10 files changed, 56 insertions, 35 deletions
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl
index b3398bd11c..0106ce095c 100644
--- a/src/core/CL/cl_kernels/scale.cl
+++ b/src/core/CL/cl_kernels/scale.cl
@@ -70,20 +70,20 @@ inline const float8 transform_bilinear(const float2 coord, const float2 scale)
  * @param[in]  out_offset_first_element_in_bytes The offset of the first element in the destination image
  * @param[in]  input_width                       Input image width
  * @param[in]  input_height                      Input image height
- * @param[in]  output_width                      Output image width
- * @param[in]  output_height                     Output image height
+ * @param[in]  scale_x                           The scale factor along x dimension
+ * @param[in]  scale_y                           The scale factor along y dimension
  */
 __kernel void scale_nearest_neighbour(
     IMAGE_DECLARATION(in),
     IMAGE_DECLARATION(out),
     const float input_width,
     const float input_height,
-    const float output_width,
-    const float output_height)
+    const float scale_x,
+    const float scale_y)
 {
     Image        in  = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
     Image        out = CONVERT_TO_IMAGE_STRUCT(out);
-    const float2 r   = (float2)(input_width / output_width, input_height / output_height);
+    const float2 r   = (float2)(scale_x, scale_y);
     const float8 tc  = clamp_to_border(transform_nearest(get_current_coords(), r), input_width, input_height);
     vstore4(read_texels4(&in, convert_int8(tc)), 0, (__global DATA_TYPE *)out.ptr);
 }
@@ -104,20 +104,20 @@ __kernel void scale_nearest_neighbour(
  * @param[in]  out_offset_first_element_in_bytes The offset of the first element in the destination image
  * @param[in]  input_width                       Input image width
  * @param[in]  input_height                      Input image height
- * @param[in]  output_width                      Output image width
- * @param[in]  output_height                     Output image height
+ * @param[in]  scale_x                           The scale factor along x dimension
+ * @param[in]  scale_y                           The scale factor along y dimension
  */
 __kernel void scale_bilinear(
     IMAGE_DECLARATION(in),
     IMAGE_DECLARATION(out),
     const float input_width,
     const float input_height,
-    const float output_width,
-    const float output_height)
+    const float scale_x,
+    const float scale_y)
 {
     Image        in  = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
     Image        out = CONVERT_TO_IMAGE_STRUCT(out);
-    const float2 r   = (float2)(input_width / output_width, input_height / output_height);
+    const float2 r   = (float2)(scale_x, scale_y);
     const float8 tc  = transform_bilinear(get_current_coords(), r);
     vstore4(bilinear_interpolate(&in, tc, input_width, input_height), 0, (__global DATA_TYPE *)out.ptr);
 }
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index 66afc3db60..82ebe644ea 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -98,9 +98,12 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
     ICLKernel::configure(win);
 
     // Set static kernel arguments
+    const float scale_x = static_cast<float>(input->info()->dimension(0)) / output->info()->dimension(0);
+    const float scale_y = static_cast<float>(input->info()->dimension(1)) / output->info()->dimension(1);
+
     unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
     _kernel.setArg<float>(idx++, input->info()->dimension(0));
     _kernel.setArg<float>(idx++, input->info()->dimension(1));
-    _kernel.setArg<float>(idx++, output->info()->dimension(0));
-    _kernel.setArg<float>(idx++, output->info()->dimension(1));
+    _kernel.setArg<float>(idx++, scale_x);
+    _kernel.setArg<float>(idx++, scale_y);
 }
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index aa653ace60..6b3b5c748f 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -114,6 +114,27 @@ public:
     }
 };
 
+/** Data set containing medium tensor shapes. */
+class MediumShapes final : public ShapeDataset
+{
+public:
+    MediumShapes()
+        : ShapeDataset("Shape",
+    {
+        // Batch size 1
+        TensorShape{ 37U, 37U },
+                     TensorShape{ 27U, 33U, 2U },
+                     TensorShape{ 128U, 64U, 1U, 3U },
+                     // Batch size 4
+                     TensorShape{ 37U, 37U, 3U, 4U },
+                     TensorShape{ 27U, 33U, 2U, 4U },
+                     // Arbitrary batch size
+                     TensorShape{ 37U, 37U, 3U, 5U }
+    })
+    {
+    }
+};
+
 /** Data set containing large tensor shapes. */
 class LargeShapes final : public ShapeDataset
 {
@@ -185,17 +206,13 @@ public:
         : ShapeDataset("InputShape",
     {
         // Batch size 1
-        TensorShape{ 5U, 5U, 3U },
+        TensorShape{ 35U, 35U, 3U },
                      TensorShape{ 32U, 37U, 3U },
-                     TensorShape{ 13U, 15U, 8U },
                      // Batch size 4
-                     TensorShape{ 5U, 5U, 3U, 4U },
                      TensorShape{ 32U, 37U, 3U, 4U },
-                     TensorShape{ 13U, 15U, 8U, 4U },
                      // Batch size 8
-                     TensorShape{ 5U, 5U, 3U, 8U },
                      TensorShape{ 32U, 37U, 3U, 8U },
-                     TensorShape{ 13U, 15U, 8U, 8U },
+                     TensorShape{ 33U, 35U, 8U, 8U },
                      // Arbitrary batch size
                      TensorShape{ 32U, 37U, 3U, 8U }
     })
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index b2fd22eaee..a6e07248aa 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -43,10 +43,10 @@ namespace validation
 {
 namespace
 {
-RelativeTolerance<float>           tolerance_f32(0.001f);    /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-RelativeTolerance<half>            tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-constexpr AbsoluteTolerance<float> tolerance_q(1.0f);        /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
-constexpr float                    tolerance_num = 0.07f;    /**< Tolerance number */
+RelativeTolerance<float>            tolerance_f32(0.05f);                 /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float>  tolerance_q(1.0f);                    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index 22f27e56dd..35b9d2938b 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 /** Tolerance for float operations */
-RelativeTolerance<float> tolerance_f32(0.001f);
-RelativeTolerance<half>  tolerance_f16(half(0.2));
-constexpr float          tolerance_num = 0.07f; /**< Tolerance number */
+RelativeTolerance<float>            tolerance_f32(0.05f);
+RelativeTolerance<half_float::half> tolerance_f16(half(0.2));
+constexpr float                     tolerance_num = 0.07f; /**< Tolerance number */
 
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<float> tolerance_fixed_point(1.f);
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index 854551917e..62671e34d7 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -43,9 +43,10 @@ namespace validation
 {
 namespace
 {
-RelativeTolerance<float>           tolerance_f32(0.001f);    /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
-RelativeTolerance<half>            tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
-constexpr AbsoluteTolerance<float> tolerance_q(1.0f);        /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+RelativeTolerance<float>            tolerance_f32(0.001f);    /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr AbsoluteTolerance<float>  tolerance_q(1.0f);        /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr float                     tolerance_num = 0.02f;    /**< Tolerance number */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -92,13 +93,13 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(), framework::dataset::make("DataType",
                                                                                                DataType::F16)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 TEST_SUITE_END()
 
diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp
index 6757bd5ee3..1a458b7a08 100644
--- a/tests/validation/CL/Scale.cpp
+++ b/tests/validation/CL/Scale.cpp
@@ -57,14 +57,14 @@ const auto ScaleDataTypes = framework::dataset::make("DataType",
 /** Tolerance */
 constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
 constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
-RelativeTolerance<float>             tolerance_f32(0.01);
+RelativeTolerance<float>             tolerance_f32(0.05);
 RelativeTolerance<half>              tolerance_f16(half(0.1));
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(Scale)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), ScaleDataTypes),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::MediumShapes(), datasets::LargeShapes()), ScaleDataTypes),
                                                                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
                                                                    datasets::BorderModes()),
                shape, data_type, policy, border_mode)
diff --git a/tests/validation/CPP/Scale.cpp b/tests/validation/CPP/Scale.cpp
index ba34553a99..74489aaa96 100644
--- a/tests/validation/CPP/Scale.cpp
+++ b/tests/validation/CPP/Scale.cpp
@@ -166,4 +166,4 @@ template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x
 } // namespace reference
 } // namespace validation
 } // namespace test
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index e461633944..7d4ce57250 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -269,7 +269,7 @@ struct compare<RelativeTolerance<U>> : public compare_base<RelativeTolerance<U>>
             return true;
         }
 
-        const U epsilon = (std::is_same<half, typename std::remove_cv<U>::type>::value || (this->_reference == 0)) ? static_cast<U>(0.01) : std::numeric_limits<U>::epsilon();
+        const U epsilon = (std::is_same<half, typename std::remove_cv<U>::type>::value || (this->_reference == 0)) ? static_cast<U>(0.01) : static_cast<U>(1e-06);
 
         if(std::abs(static_cast<double>(this->_reference) - static_cast<double>(this->_target)) <= epsilon)
         {
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index ba252fbdc5..6fa810aa96 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -124,4 +124,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SCALE_FIXTURE */
-\ No newline at end of file
+#endif /* ARM_COMPUTE_TEST_SCALE_FIXTURE */