From a1422fbf985c89ffebc8f5af8093e9cd987cfe29 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 24 Oct 2018 12:20:19 +0100 Subject: COMPMID-1673: Collapse window in CLArithmeticAddition when one operand is a vector When one of the operands is a vector, the kernel does a broadcast addition and the window is not collapsed. This represent an issue because it leads to a lot of enqueues that increases the time taken by the OpenCL driver. This patch allows to collapse the window when one of the two operands is a vector. Furthermore, it adds LWS tuner to the kernel. It also changes the number of elements processed per iteration to 8 to make better usage of the cache. Change-Id: I5f09ab0ddcffb3b7f9326a987c79a997b2d7fa8c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/155003 Reviewed-by: Giuseppe Rossini Tested-by: bsgcomp --- tests/validation/CL/ArithmeticAddition.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tests/validation/CL/ArithmeticAddition.cpp') diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp index e327769397..09f1b7c5a9 100644 --- a/tests/validation/CL/ArithmeticAddition.cpp +++ b/tests/validation/CL/ArithmeticAddition.cpp @@ -43,6 +43,7 @@ namespace validation { namespace { +constexpr unsigned int num_elems_processed_per_iteration = 8; /** Input data sets **/ const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType", DataType::U8)); @@ -110,7 +111,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); validate(ref_src1.info()->padding(), padding); validate(ref_src2.info()->padding(), padding); validate(dst.info()->padding(), padding); @@ -146,7 +147,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); validate(ref_src1.info()->padding(), padding); validate(ref_src2.info()->padding(), padding); validate(dst.info()->padding(), padding); @@ -186,7 +187,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); validate(ref_src1.info()->padding(), padding); validate(ref_src2.info()->padding(), padding); validate(dst.info()->padding(), padding); @@ -235,7 +236,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); validate(ref_src1.info()->padding(), padding); validate(ref_src2.info()->padding(), padding); validate(dst.info()->padding(), padding); -- cgit v1.2.1