aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-08-23 23:56:54 +0100
committerGunes Bayir <gunes.bayir@arm.com>2023-08-29 13:48:09 +0000
commit362e1e07b958e649af5aa459babe8b309cda45d7 (patch)
tree3940f71a7875d1dff3fcabb6e6065f6d2c15acce /compute_kernel_writer
parent2d0c2f5700434a4b0c3345c71a3a45825a0e6766 (diff)
downloadComputeLibrary-362e1e07b958e649af5aa459babe8b309cda45d7.tar.gz
Fix load/store tests in CKW
The tests were disabled because some tests requires constants and constant tile declaration was not present. Partially Resolves: COMPMID-5791, COMPMID-6389 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: Icf8a901e9b552207bff1366955b88ec46d47bf04 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10211 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer')
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.cpp10
-rw-r--r--compute_kernel_writer/validation/Validation.cpp2
-rw-r--r--compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h84
3 files changed, 54 insertions, 42 deletions
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index 90707ccbb1..c64b7415ff 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -525,8 +525,8 @@ const CLTile &CLKernelWriter::to_cl_tile(const TileOperand &operand) const
void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
{
- const CLTile dilation_x("1", DataType::Int32);
- const CLTile dilation_y("1", DataType::Int32);
+ const CLTile dilation_x({{"1"}}, DataType::Int32);
+ const CLTile dilation_y({{"1"}}, DataType::Int32);
op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
}
@@ -544,8 +544,8 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe
void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
{
- const CLTile dilation_x("1", DataType::Int32);
- const CLTile dilation_y("1", DataType::Int32);
+ const CLTile dilation_x({{"1"}}, DataType::Int32);
+ const CLTile dilation_y({{"1"}}, DataType::Int32);
op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
}
@@ -565,7 +565,7 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
const CLTile &dilation_x, const CLTile &dilation_y)
{
CKW_UNUSED(dilation_x);
- CKW_ASSERT(dilation_x.scalar(0, 0).str == "1"); // Dilation in x dimension is not implemented yet
+ CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet
ITensor &tensor = get_tensor(tensor_op);
diff --git a/compute_kernel_writer/validation/Validation.cpp b/compute_kernel_writer/validation/Validation.cpp
index 6425d25f2b..c8d0f6b45d 100644
--- a/compute_kernel_writer/validation/Validation.cpp
+++ b/compute_kernel_writer/validation/Validation.cpp
@@ -120,7 +120,7 @@ int32_t main()
tests.push_back(test22.get());
tests.push_back(test23.get());
tests.push_back(test24.get());
- CKW_UNUSED(test25); // CLKernelWriterOpLoadStoreTest test needs further changes.
+ tests.push_back(test25.get());
tests.push_back(test26.get());
tests.push_back(test27.get());
tests.push_back(test28.get());
diff --git a/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h b/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h
index 0f4afc8bf3..5702f19ce5 100644
--- a/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h
+++ b/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h
@@ -93,79 +93,79 @@ public:
{
// Cases
const std::string load_fp_2x3_tile = R"_(
-tile_0 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3));
-tile_1 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3));
+G0__tile__0 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3));
+G0__tile__1 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3));
)_";
const std::string load_half_2x4_tile_image_clamp_y = R"_(
-tile_0 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((x) >> 2, (y + 0 + (z) * G0__tensor_dim1 + (b) * G0__tensor_dim1 * G0__tensor_dim2)));
-tile_1 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((x) >> 2, (y + 1 + (z) * G0__tensor_dim1 + (b) * G0__tensor_dim1 * G0__tensor_dim2)));
+G0__tile__0 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((G0__x) >> 2, (G0__y + 0 * ((int)(1)) + (G0__z) * G0__tensor_dim1 + (G0__b) * G0__tensor_dim1 * G0__tensor_dim2)));
+G0__tile__1 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((G0__x) >> 2, (G0__y + 1 * ((int)(1)) + (G0__z) * G0__tensor_dim1 + (G0__b) * G0__tensor_dim1 * G0__tensor_dim2)));
)_";
const std::string store_fp_2x3_tile = R"_(
-vstore3(tile_0, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (b) * G0__tensor_stride3));
-vstore3(tile_1, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (b) * G0__tensor_stride3));
+vstore3(G0__tile__0, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__b) * G0__tensor_stride3));
+vstore3(G0__tile__1, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__b) * G0__tensor_stride3));
)_";
const std::string store_int8_4x4_y_dilation_batch_eq_0 = R"_(
-vstore4(tile_0, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 0 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2));
-vstore4(tile_1, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 1 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2));
-vstore4(tile_2, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 2 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2));
-vstore4(tile_3, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 3 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2));
+vstore4(G0__tile__0, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 0 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3));
+vstore4(G0__tile__1, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 1 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3));
+vstore4(G0__tile__2, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 2 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3));
+vstore4(G0__tile__3, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 3 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3));
)_";
// tensor dimension is 10
const std::string load_fp_2x3_tile_x_overlapping_min_y_eq_0_batch_eq_1 = R"_(
-if(x > 0)
+if(G0__x > 0)
{
-tile_0 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (0 + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3));
-tile_1 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (0 + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3));
+G0__tile__0 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (((int)(0)) + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3));
+G0__tile__1 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (((int)(0)) + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3));
}
else
{
-tile_0.s0 = *((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (0 + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3));
-tile_1.s0 = *((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (0 + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3));
+G0__tile__0.s0 = *((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (((int)(0)) + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3));
+G0__tile__1.s0 = *((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (((int)(0)) + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3));
}
)_";
const std::string store_fp_2x3_tile_x_overlapping_min_y_clamp_to_border_max_only = R"_(
-if(x > 0)
+if(G0__x > 0)
{
-if(y + 0 < G0__tensor_dim1)
+if(G0__y + 0 * ((int)(1)) < G0__tensor_dim1)
{
-vstore3(tile_0, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3));
+vstore3(G0__tile__0, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3));
}
else
{
-tile_0 = 0.0f;
+G0__tile__0 = 0.0f;
}
-if(y + 1 < G0__tensor_dim1)
+if(G0__y + 1 * ((int)(1)) < G0__tensor_dim1)
{
-vstore3(tile_1, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3));
+vstore3(G0__tile__1, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3));
}
else
{
-tile_1 = 0.0f;
+G0__tile__1 = 0.0f;
}
}
else
{
-if(y + 0 < G0__tensor_dim1)
+if(G0__y + 0 * ((int)(1)) < G0__tensor_dim1)
{
-*((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)) = tile_0.s0;
+*((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)) = G0__tile__0.s0;
}
else
{
-tile_0.s0 = 0.0f;
+G0__tile__0.s0 = 0.0f;
}
-if(y + 1 < G0__tensor_dim1)
+if(G0__y + 1 * ((int)(1)) < G0__tensor_dim1)
{
-*((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)) = tile_1.s0;
+*((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)) = G0__tile__1.s0;
}
else
{
-tile_1.s0 = 0.0f;
+G0__tile__1.s0 = 0.0f;
}
}
)_";
const std::string store_half_2x4_tile_x_image_y_dilation = R"_(
-write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 0 * y_dilation + (z) * G0__tensor_dim1 + (1) * G0__tensor_dim1 * G0__tensor_dim2)), tile_0);
-write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__tensor_dim1 + (1) * G0__tensor_dim1 * G0__tensor_dim2)), tile_1);
+write_imageh(G0__tensor_img2d, (int2)((G0__x) >> 2, (((int)(0)) + 0 * G0__y_dilation + (G0__z) * G0__tensor_dim1 + (((int)(1))) * G0__tensor_dim1 * G0__tensor_dim2)), G0__tile__0);
+write_imageh(G0__tensor_img2d, (int2)((G0__x) >> 2, (((int)(0)) + 1 * G0__y_dilation + (G0__z) * G0__tensor_dim1 + (((int)(1))) * G0__tensor_dim1 * G0__tensor_dim2)), G0__tile__1);
)_";
// Configs Bundled
@@ -237,6 +237,18 @@ write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__
};
}
+ TileOperand declare_tile_helper(KernelWriter &writer, std::string tile)
+ {
+ if(tile == "0" || tile == "1")
+ {
+ return writer.declare_constant_tile(ConstantData({{std::stoi(tile)}}, DataType::Int32));
+ }
+ else
+ {
+ return writer.declare_tile(tile, TileInfo(DataType::Int32));
+ }
+ }
+
bool run() override
{
bool all_tests_passed = true;
@@ -255,12 +267,12 @@ write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__
const std::string expected_code = std::get<6>(_config).substr(1); // ignore initial newline, which was added for convenience
TileOperand tile_op = writer.declare_tile("tile", tile_info);
- TileOperand x_op = writer.declare_tile(coord.x, TileInfo(DataType::Int32));
- TileOperand y_op = writer.declare_tile(coord.y, TileInfo(DataType::Int32));
- TileOperand z_op = writer.declare_tile(coord.z, TileInfo(DataType::Int32));
- TileOperand batch_op = writer.declare_tile(coord.batch, TileInfo(DataType::Int32));
- TileOperand dil_x_op = writer.declare_tile(dilations.dilation_x, TileInfo(DataType::Int32));
- TileOperand dil_y_op = writer.declare_tile(dilations.dilation_y, TileInfo(DataType::Int32));
+ TileOperand x_op = declare_tile_helper(writer, coord.x);
+ TileOperand y_op = declare_tile_helper(writer, coord.y);
+ TileOperand z_op = declare_tile_helper(writer, coord.z);
+ TileOperand batch_op = declare_tile_helper(writer, coord.batch);
+ TileOperand dil_x_op = declare_tile_helper(writer, dilations.dilation_x);
+ TileOperand dil_y_op = declare_tile_helper(writer, dilations.dilation_y);
TensorShape tensor_shape {10, 10, 10, 10};
TensorInfo tensor_info(tile_info.data_type(), tensor_shape, TensorDataLayout::Nhwc, 0 /* id */);