aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-10-03 17:11:09 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:19 +0000
commitd041a835041159a0a6744fc271df15e9f66167bc (patch)
tree895f01d68218cbeabf639ef027d519fa4c96d655
parentecd9d09c7c77005586250587ec8e1ddb6f224bde (diff)
downloadComputeLibrary-d041a835041159a0a6744fc271df15e9f66167bc.tar.gz
COMPMID-1610: Fixed CLDirectConvolution mismatches
Kernel size 5x5 layout NHWC. Change-Id: Ia82ff211d1c954df228962b5c2c5ad8df7112449 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/151740 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/direct_convolution5x5.cl62
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp3
-rw-r--r--tests/validation/CL/DirectConvolutionLayer.cpp24
3 files changed, 68 insertions, 21 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution5x5.cl b/src/core/CL/cl_kernels/direct_convolution5x5.cl
index 70be058854..5299409243 100644
--- a/src/core/CL/cl_kernels/direct_convolution5x5.cl
+++ b/src/core/CL/cl_kernels/direct_convolution5x5.cl
@@ -194,11 +194,11 @@ __kernel void direct_convolution5x5_nhwc(
__global uchar *src_addr = (__global uchar *)offset(&src, 0, 0) - src_stride_x * id0 + ((id2 * STRIDE_Y) - PAD_TOP) * (int)src_stride_z;
weights_addr += id0 * weights_stride_w;
- const int coordy = id2 - PAD_TOP;
+#if(PAD_TOP == 1)
+ const int coordy = id2 - PAD_TOP;
for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
{
-#if(PAD_TOP)
if(coordy < 0) // special case Z = -1 doesn't exists
{
//skip first row and load the two next ones
@@ -224,17 +224,69 @@ __kernel void direct_convolution5x5_nhwc(
CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
CONVOLUTION1x5_NHWC(values0, (src_addr + 4 * (int)src_stride_z), (weights_addr + 4 * (int)weights_stride_z));
}
-#else //PAD_TOP > 0
+ src_addr += src_stride_x;
+ weights_addr += weights_stride_x;
+ }
+#elif(PAD_TOP == 2)
+ const int coordy = id2 * STRIDE_Y;
+ for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
+ {
+ if(coordy == 0) // special case Z = -2 doesn't exists
+ {
+ //skip first row and load the two next ones
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 4 * (int)src_stride_z), (weights_addr + 4 * (int)weights_stride_z));
+ }
+ else if(coordy == 1) // special case Z = -1 doesn't exists
+ {
+ //skip first row and load the two next ones
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 1 * (int)src_stride_z), (weights_addr + 1 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 4 * (int)src_stride_z), (weights_addr + 4 * (int)weights_stride_z));
+ }
+ else if(coordy == (SRC_HEIGHT - 1))
+ {
+ // special case when computing the last row of the output we must read the last three rows from the input buffer (including padding) but the
+ // Z axis has no padding at all.
+ CONVOLUTION1x5_NHWC(values0, src_addr, weights_addr);
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 1 * (int)src_stride_z), (weights_addr + 1 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
+ }
+ else if(coordy == (SRC_HEIGHT - 2))
+ {
+ // special case when computing the last row of the output we must read the last three rows from the input buffer (including padding) but the
+ // Z axis has no padding at all.
+ CONVOLUTION1x5_NHWC(values0, src_addr, weights_addr);
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 1 * (int)src_stride_z), (weights_addr + 1 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
+ }
+ else
+ {
+ CONVOLUTION1x5_NHWC(values0, src_addr, weights_addr);
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 1 * (int)src_stride_z), (weights_addr + 1 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
+ CONVOLUTION1x5_NHWC(values0, (src_addr + 4 * (int)src_stride_z), (weights_addr + 4 * (int)weights_stride_z));
+ }
+ src_addr += src_stride_x;
+ weights_addr += weights_stride_x;
+ }
+
+#else /* PAD_TOP == 2 */
+ for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
+ {
CONVOLUTION1x5_NHWC(values0, src_addr, weights_addr);
CONVOLUTION1x5_NHWC(values0, (src_addr + 1 * (int)src_stride_z), (weights_addr + 1 * (int)weights_stride_z));
CONVOLUTION1x5_NHWC(values0, (src_addr + 2 * (int)src_stride_z), (weights_addr + 2 * (int)weights_stride_z));
CONVOLUTION1x5_NHWC(values0, (src_addr + 3 * (int)src_stride_z), (weights_addr + 3 * (int)weights_stride_z));
CONVOLUTION1x5_NHWC(values0, (src_addr + 4 * (int)src_stride_z), (weights_addr + 4 * (int)weights_stride_z));
-#endif // PAD_TOP > 0
-
src_addr += src_stride_x;
weights_addr += weights_stride_x;
}
+#endif /* PAD_TOP == 1 */
#ifdef HAS_BIAS
Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index c3d514adb4..471b3209ac 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -421,8 +421,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
}
else
{
- bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type);
-
+ const bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type);
build_options.add_option_if(is_quantized_asymm, std::string("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size)));
build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type)));
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index e29ec7ef4c..9db73820c2 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -44,23 +44,20 @@ namespace validation
namespace
{
// COMPMID-517 Invesitgate the mismatch to see whether it is a real bug
-RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
-RelativeTolerance<float> tolerance_fp32(0.02f); /**< Tolerance for floating point tests */
-constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.02f); /**< Tolerance for floating point tests */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
+const auto data_strides = combine(framework::dataset::make("StrideX", 1, 3), framework::dataset::make("StrideY", 1, 3));
+const auto data_ksize_one = combine(framework::dataset::make("PadX", 0, 1), combine(framework::dataset::make("PadY", 0, 1), framework::dataset::make("KernelSize", 1)));
+const auto data_ksize_three = combine(framework::dataset::make("PadX", 0, 2), combine(framework::dataset::make("PadY", 0, 2), framework::dataset::make("KernelSize", 3)));
+const auto data_ksize_five = combine(framework::dataset::make("PadX", 0, 3), combine(framework::dataset::make("PadY", 0, 3), framework::dataset::make("KernelSize", 5)));
+const auto data_all_kernels = concat(concat(data_ksize_one, data_ksize_three), data_ksize_five);
/** Direct convolution data set. */
const auto data = combine(datasets::SmallDirectConvolutionShapes(),
- combine(framework::dataset::make("StrideX", 1, 3),
- combine(framework::dataset::make("StrideY", 1, 3),
- combine(concat(combine(framework::dataset::make("PadX", 0, 1),
- combine(framework::dataset::make("PadY", 0, 1),
- framework::dataset::make("KernelSize", 1))),
- combine(framework::dataset::make("PadX", 0, 2),
- combine(framework::dataset::make("PadY", 0, 2),
- framework::dataset::make("KernelSize", { 3, 5 })))),
- framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+ combine(data_strides, combine(data_all_kernels, framework::dataset::make("NumKernels", { 1, 4, 8, 16 }))));
/** Activation function Dataset*/
const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -174,7 +171,6 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixture<float>, framework::D
ActivationFunctionsDataset),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
- // Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
TEST_SUITE_END()