From 362e1e07b958e649af5aa459babe8b309cda45d7 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Wed, 23 Aug 2023 23:56:54 +0100 Subject: Fix load/store tests in CKW The tests were disabled because some tests requires constants and constant tile declaration was not present. Partially Resolves: COMPMID-5791, COMPMID-6389 Signed-off-by: Gunes Bayir Change-Id: Icf8a901e9b552207bff1366955b88ec46d47bf04 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10211 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins --- compute_kernel_writer/src/cl/CLKernelWriter.cpp | 10 +-- compute_kernel_writer/validation/Validation.cpp | 2 +- .../tests/CLKernelWriterOpLoadStoreTest.h | 84 ++++++++++++---------- 3 files changed, 54 insertions(+), 42 deletions(-) (limited to 'compute_kernel_writer') diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp index 90707ccbb1..c64b7415ff 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp +++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp @@ -525,8 +525,8 @@ const CLTile &CLKernelWriter::to_cl_tile(const TileOperand &operand) const void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) { - const CLTile dilation_x("1", DataType::Int32); - const CLTile dilation_y("1", DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y); } @@ -544,8 +544,8 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) { - const CLTile dilation_x("1", DataType::Int32); - const CLTile dilation_y("1", DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y); } @@ -565,7 +565,7 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o const CLTile &dilation_x, const CLTile &dilation_y) { CKW_UNUSED(dilation_x); - CKW_ASSERT(dilation_x.scalar(0, 0).str == "1"); // Dilation in x dimension is not implemented yet + CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet ITensor &tensor = get_tensor(tensor_op); diff --git a/compute_kernel_writer/validation/Validation.cpp b/compute_kernel_writer/validation/Validation.cpp index 6425d25f2b..c8d0f6b45d 100644 --- a/compute_kernel_writer/validation/Validation.cpp +++ b/compute_kernel_writer/validation/Validation.cpp @@ -120,7 +120,7 @@ int32_t main() tests.push_back(test22.get()); tests.push_back(test23.get()); tests.push_back(test24.get()); - CKW_UNUSED(test25); // CLKernelWriterOpLoadStoreTest test needs further changes. + tests.push_back(test25.get()); tests.push_back(test26.get()); tests.push_back(test27.get()); tests.push_back(test28.get()); diff --git a/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h b/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h index 0f4afc8bf3..5702f19ce5 100644 --- a/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h +++ b/compute_kernel_writer/validation/tests/CLKernelWriterOpLoadStoreTest.h @@ -93,79 +93,79 @@ public: { // Cases const std::string load_fp_2x3_tile = R"_( -tile_0 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)); -tile_1 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)); +G0__tile__0 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)); +G0__tile__1 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)); )_"; const std::string load_half_2x4_tile_image_clamp_y = R"_( -tile_0 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((x) >> 2, (y + 0 + (z) * G0__tensor_dim1 + (b) * G0__tensor_dim1 * G0__tensor_dim2))); -tile_1 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((x) >> 2, (y + 1 + (z) * G0__tensor_dim1 + (b) * G0__tensor_dim1 * G0__tensor_dim2))); +G0__tile__0 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((G0__x) >> 2, (G0__y + 0 * ((int)(1)) + (G0__z) * G0__tensor_dim1 + (G0__b) * G0__tensor_dim1 * G0__tensor_dim2))); +G0__tile__1 = read_imageh(G0__tensor_img2d, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST, (int2)((G0__x) >> 2, (G0__y + 1 * ((int)(1)) + (G0__z) * G0__tensor_dim1 + (G0__b) * G0__tensor_dim1 * G0__tensor_dim2))); )_"; const std::string store_fp_2x3_tile = R"_( -vstore3(tile_0, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (b) * G0__tensor_stride3)); -vstore3(tile_1, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (b) * G0__tensor_stride3)); +vstore3(G0__tile__0, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__b) * G0__tensor_stride3)); +vstore3(G0__tile__1, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__b) * G0__tensor_stride3)); )_"; const std::string store_int8_4x4_y_dilation_batch_eq_0 = R"_( -vstore4(tile_0, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 0 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2)); -vstore4(tile_1, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 1 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2)); -vstore4(tile_2, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 2 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2)); -vstore4(tile_3, 0, (__global char*)(G0__tensor_ptr + (1) * sizeof(char) + (y + 3 * y_dilation) * G0__tensor_stride1 + (z) * G0__tensor_stride2)); +vstore4(G0__tile__0, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 0 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3)); +vstore4(G0__tile__1, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 1 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3)); +vstore4(G0__tile__2, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 2 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3)); +vstore4(G0__tile__3, 0, (__global char*)(G0__tensor_ptr + (((int)(1))) * sizeof(char) + (G0__y + 3 * G0__y_dilation) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(0))) * G0__tensor_stride3)); )_"; // tensor dimension is 10 const std::string load_fp_2x3_tile_x_overlapping_min_y_eq_0_batch_eq_1 = R"_( -if(x > 0) +if(G0__x > 0) { -tile_0 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (0 + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3)); -tile_1 = vload3(0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (0 + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3)); +G0__tile__0 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (((int)(0)) + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3)); +G0__tile__1 = vload3(0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (((int)(0)) + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3)); } else { -tile_0.s0 = *((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (0 + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3)); -tile_1.s0 = *((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (0 + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (1) * G0__tensor_stride3)); +G0__tile__0.s0 = *((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (((int)(0)) + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3)); +G0__tile__1.s0 = *((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (((int)(0)) + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (((int)(1))) * G0__tensor_stride3)); } )_"; const std::string store_fp_2x3_tile_x_overlapping_min_y_clamp_to_border_max_only = R"_( -if(x > 0) +if(G0__x > 0) { -if(y + 0 < G0__tensor_dim1) +if(G0__y + 0 * ((int)(1)) < G0__tensor_dim1) { -vstore3(tile_0, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)); +vstore3(G0__tile__0, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)); } else { -tile_0 = 0.0f; +G0__tile__0 = 0.0f; } -if(y + 1 < G0__tensor_dim1) +if(G0__y + 1 * ((int)(1)) < G0__tensor_dim1) { -vstore3(tile_1, 0, (__global float*)(G0__tensor_ptr + (x) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)); +vstore3(G0__tile__1, 0, (__global float*)(G0__tensor_ptr + (G0__x) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)); } else { -tile_1 = 0.0f; +G0__tile__1 = 0.0f; } } else { -if(y + 0 < G0__tensor_dim1) +if(G0__y + 0 * ((int)(1)) < G0__tensor_dim1) { -*((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (y + 0) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)) = tile_0.s0; +*((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (G0__y + 0 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)) = G0__tile__0.s0; } else { -tile_0.s0 = 0.0f; +G0__tile__0.s0 = 0.0f; } -if(y + 1 < G0__tensor_dim1) +if(G0__y + 1 * ((int)(1)) < G0__tensor_dim1) { -*((__global float*)(G0__tensor_ptr + (x + 0) * sizeof(float) + (y + 1) * G0__tensor_stride1 + (z) * G0__tensor_stride2 + (b) * G0__tensor_stride3)) = tile_1.s0; +*((__global float*)(G0__tensor_ptr + (G0__x + 0) * sizeof(float) + (G0__y + 1 * ((int)(1))) * G0__tensor_stride1 + (G0__z) * G0__tensor_stride2 + (G0__b) * G0__tensor_stride3)) = G0__tile__1.s0; } else { -tile_1.s0 = 0.0f; +G0__tile__1.s0 = 0.0f; } } )_"; const std::string store_half_2x4_tile_x_image_y_dilation = R"_( -write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 0 * y_dilation + (z) * G0__tensor_dim1 + (1) * G0__tensor_dim1 * G0__tensor_dim2)), tile_0); -write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__tensor_dim1 + (1) * G0__tensor_dim1 * G0__tensor_dim2)), tile_1); +write_imageh(G0__tensor_img2d, (int2)((G0__x) >> 2, (((int)(0)) + 0 * G0__y_dilation + (G0__z) * G0__tensor_dim1 + (((int)(1))) * G0__tensor_dim1 * G0__tensor_dim2)), G0__tile__0); +write_imageh(G0__tensor_img2d, (int2)((G0__x) >> 2, (((int)(0)) + 1 * G0__y_dilation + (G0__z) * G0__tensor_dim1 + (((int)(1))) * G0__tensor_dim1 * G0__tensor_dim2)), G0__tile__1); )_"; // Configs Bundled @@ -237,6 +237,18 @@ write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__ }; } + TileOperand declare_tile_helper(KernelWriter &writer, std::string tile) + { + if(tile == "0" || tile == "1") + { + return writer.declare_constant_tile(ConstantData({{std::stoi(tile)}}, DataType::Int32)); + } + else + { + return writer.declare_tile(tile, TileInfo(DataType::Int32)); + } + } + bool run() override { bool all_tests_passed = true; @@ -255,12 +267,12 @@ write_imageh(G0__tensor_img2d, (int2)((x) >> 2, (0 + 1 * y_dilation + (z) * G0__ const std::string expected_code = std::get<6>(_config).substr(1); // ignore initial newline, which was added for convenience TileOperand tile_op = writer.declare_tile("tile", tile_info); - TileOperand x_op = writer.declare_tile(coord.x, TileInfo(DataType::Int32)); - TileOperand y_op = writer.declare_tile(coord.y, TileInfo(DataType::Int32)); - TileOperand z_op = writer.declare_tile(coord.z, TileInfo(DataType::Int32)); - TileOperand batch_op = writer.declare_tile(coord.batch, TileInfo(DataType::Int32)); - TileOperand dil_x_op = writer.declare_tile(dilations.dilation_x, TileInfo(DataType::Int32)); - TileOperand dil_y_op = writer.declare_tile(dilations.dilation_y, TileInfo(DataType::Int32)); + TileOperand x_op = declare_tile_helper(writer, coord.x); + TileOperand y_op = declare_tile_helper(writer, coord.y); + TileOperand z_op = declare_tile_helper(writer, coord.z); + TileOperand batch_op = declare_tile_helper(writer, coord.batch); + TileOperand dil_x_op = declare_tile_helper(writer, dilations.dilation_x); + TileOperand dil_y_op = declare_tile_helper(writer, dilations.dilation_y); TensorShape tensor_shape {10, 10, 10, 10}; TensorInfo tensor_info(tile_info.data_type(), tensor_shape, TensorDataLayout::Nhwc, 0 /* id */); -- cgit v1.2.1