aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-09-12 13:35:38 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit17220e2eb49e75b85f2b802489a44b8019997c25 (patch)
treecd2ab40769e866283c4d708de1d723b6f1150f66
parentf1addb665ad668dcd34e18c52e4961a7cf5e3886 (diff)
downloadComputeLibrary-17220e2eb49e75b85f2b802489a44b8019997c25.tar.gz
COMPMID-1507 Add support for QASYMM8 in CLScaleKernel
Change-Id: I4a32e47e6d9152633668cf0e14db88fc8c26f7ea Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148167 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
-rw-r--r--src/core/CL/CLKernelLibrary.cpp6
-rw-r--r--src/core/CL/cl_kernels/scale_quantized.cl169
-rw-r--r--src/core/CL/cl_kernels/warp_helpers_quantized.h138
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp15
-rw-r--r--tests/validation/CL/Scale.cpp61
-rw-r--r--tests/validation/fixtures/ScaleFixture.h70
-rw-r--r--tests/validation/reference/Scale.cpp32
-rw-r--r--tests/validation/reference/Scale.h5
8 files changed, 460 insertions, 36 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 8f5e81bae9..392fbfefb0 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -356,6 +356,8 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "scale_nearest_neighbour_nhwc", "scale.cl" },
{ "scale_bilinear_nchw", "scale.cl" },
{ "scale_bilinear_nhwc", "scale.cl" },
+ { "scale_bilinear_quantized_nchw", "scale_quantized.cl" },
+ { "scale_bilinear_quantized_nhwc", "scale_quantized.cl" },
{ "scharr3x3", "scharr_filter.cl" },
{ "sobel3x3", "sobel_filter.cl" },
{ "sobel_separable5x1", "sobel_filter.cl" },
@@ -745,6 +747,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/scale.clembed"
},
{
+ "scale_quantized.cl",
+#include "./cl_kernels/scale_quantized.clembed"
+ },
+ {
"scharr_filter.cl",
#include "./cl_kernels/scharr_filter.clembed"
},
diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl
new file mode 100644
index 0000000000..3211e7efa1
--- /dev/null
+++ b/src/core/CL/cl_kernels/scale_quantized.cl
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers_asymm.h"
+#include "warp_helpers_quantized.h"
+
+/** Transforms four 2D coordinates. This is used to map the output coordinates to the input coordinates.
+ *
+ * @param[in] coord 2D coordinates to transform.
+ * @param[in] scale input/output scale ratio
+ *
+ * @return a float8 containing 4 2D transformed values in the input image.
+ */
+inline const float8 transform_bilinear_quantized(const float2 coord, const float2 scale)
+{
+ const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
+#ifdef SAMPLING_POLICY_TOP_LEFT
+ const float4 new_x = in_x_coords * (float4)(scale.s0);
+ const float4 new_y = (float4)(coord.s1 * scale.s1);
+ return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#elif SAMPLING_POLICY_CENTER
+ const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0) - (float4)(0.5f);
+ const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1 - 0.5f);
+ return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#else /* SAMPLING_POLICY */
+#error("Unsupported sampling policy");
+#endif /* SAMPLING_POLICY */
+}
+
+/** Performs an affine transformation on an image interpolating with the BILINEAR method.
+ *
+ * @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
+ * @note Scale value for QASYMM8 data type to used is passed as -DSCALE=<VALUE> e.g. -DSCALE=0.5
+ * @note Offset value for QASYMM8 data type to used is passed as -DOFFSET=<VALUE> e.g. -DOFFSET=1
+ *
+ * @param[in] in_ptr Pointer to the source image. Supported data types: QASYMM8.
+ * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16. (Must be the same as the input)
+ * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
+ * @param[in] input_width Input image width
+ * @param[in] input_height Input image height
+ * @param[in] scale_x The scale factor along x dimension
+ * @param[in] scale_y The scale factor along y dimension
+ */
+__kernel void scale_bilinear_quantized_nchw(
+ IMAGE_DECLARATION(in),
+ IMAGE_DECLARATION(out),
+ const float input_width,
+ const float input_height,
+ const float scale_x,
+ const float scale_y)
+{
+ Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
+ Image out = CONVERT_TO_IMAGE_STRUCT(out);
+ const float2 r = (float2)(scale_x, scale_y);
+ const float8 tc = transform_bilinear_quantized(get_current_coords_quantized(), r);
+ vstore4(bilinear_interpolate_with_border_quantized(&in, tc, input_width, input_height, BORDER_SIZE, SCALE, OFFSET), 0, (__global DATA_TYPE *)out.ptr);
+}
+
+/** Performs scale on an image interpolating with the BILINEAR method. (NHWC)
+ *
+ * @note Sampling policy to be used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
+ * @note Scale value for QASYMM8 data type to used is passed as -DSCALE=<VALUE> e.g. -DSCALE=0.5
+ * @note Offset value for QASYMM8 data type to used is passed as -DOFFSET=<VALUE> e.g. -DOFFSET=1
+ * @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
+ *
+ * @param[in] in_ptr Pointer to the source image. Supported data types: QASYMM8.
+ * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in_stride_z Stride of the source image in Z dimension (in bytes)
+ * @param[in] in_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in_ptr
+ * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] out_stride_z Stride of the destination image in Z dimension (in bytes)
+ * @param[in] out_step_z dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
+ * @param[in] input_width Input image width
+ * @param[in] input_height Input image height
+ * @param[in] scale_x The scale factor along x dimension
+ * @param[in] scale_y The scale factor along y dimension
+ */
+__kernel void scale_bilinear_quantized_nhwc(
+ TENSOR3D_DECLARATION(in),
+ TENSOR3D_DECLARATION(out),
+ const float input_width,
+ const float input_height,
+ const float scale_x,
+ const float scale_y)
+{
+ Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(in);
+ Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+
+#ifdef SAMPLING_POLICY_TOP_LEFT
+ const float new_x = get_global_id(1) * scale_x;
+ const float new_y = get_global_id(2) * scale_y;
+#elif SAMPLING_POLICY_CENTER
+ const float new_x = (get_global_id(1) + 0.5f) * scale_x - 0.5f;
+ const float new_y = (get_global_id(2) + 0.5f) * scale_y - 0.5f;
+#else /* SAMPLING_POLICY */
+#error("Unsupported sampling policy");
+#endif /* SAMPLING_POLICY */
+
+ const float new_xf = floor(new_x);
+ const float new_yf = floor(new_y);
+ float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
+ float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
+ float clamped_x_ = clamped_x;
+ float clamped_x1_ = clamped_x1;
+ const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
+ const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
+
+#ifndef BORDER_MODE_REPLICATE
+ clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);
+ clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
+ clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
+ clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);
+#endif /* BORDER_MODE_REPLICATE */
+
+ int4 ins = (int4)(*((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y))),
+ *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y))),
+ *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1))),
+ *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1))));
+
+ const float a = new_x - new_xf;
+ const float b = 1.f - a;
+ const float a1 = new_y - new_yf;
+ const float b1 = 1.f - a1;
+ const float4 insf32 = convert_float4(ins - (int4)OFFSET) * (float4)SCALE;
+
+ const float fr = ((insf32.s0 * b * b1) + (insf32.s1 * a * b1) + (insf32.s2 * b * a1) + (insf32.s3 * a * a1));
+
+ uchar res = convert_uchar_sat(convert_int_sat_rtp(fr / SCALE) + OFFSET);
+
+ *((__global DATA_TYPE *)out.ptr) = res;
+}
diff --git a/src/core/CL/cl_kernels/warp_helpers_quantized.h b/src/core/CL/cl_kernels/warp_helpers_quantized.h
new file mode 100644
index 0000000000..48d6faef73
--- /dev/null
+++ b/src/core/CL/cl_kernels/warp_helpers_quantized.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers_asymm.h"
+
+/** Clamps the given coordinates to the borders according to the border size.
+ *
+ * @param[in] coords Vector of 2D coordinates to clamp. Even positions are X coords, odd positions are Y coords.
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ * @param[in] border_size Border size of the image
+ *
+ */
+inline const float8 clamp_to_border_with_size_quantized(float8 coords, const float width, const float height, const float border_size)
+{
+ const float4 clamped_x = clamp(coords.even, 0.0f - border_size, width - 1 + border_size);
+ const float4 clamped_y = clamp(coords.odd, 0.0f - border_size, height - 1 + border_size);
+ return (float8)(clamped_x.s0, clamped_y.s0, clamped_x.s1, clamped_y.s1, clamped_x.s2, clamped_y.s2, clamped_x.s3, clamped_y.s3);
+}
+
+/* FIXME(COMPMID-682): Clamp border properly in UNDEFINED border mode in Warp, Scale, Remap */
+/** Clamps the given coordinates to the borders.
+ *
+ * @param[in] coords Vector of 2D coordinates to clamp. Even positions are X coords, odd positions are Y coords.
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ *
+ */
+inline const float8 clamp_to_border_quantized(float8 coords, const float width, const float height)
+{
+ return clamp_to_border_with_size_quantized(coords, width, height, 1);
+}
+
+/** Given a texel coordinates this function will return the following array of coordinates:
+ * [ P, right neighbour, below neighbour, below right neighbour ]
+ *
+ * @note No checks to see if the coordinates are out of the image are done here.
+ *
+ * @param[in] coord Input coordinates
+ *
+ * @return vector of 8 floats with the coordinates, even positions are x and odd y.
+ */
+inline const float8 get_neighbour_coords_quantized(const float2 coord)
+{
+ return (float8)(/*tl*/ coord.s0, coord.s1, /*tr*/ coord.s0 + 1, coord.s1, /*bl*/ coord.s0, coord.s1 + 1, /*br*/ coord.s0 + 1, coord.s1 + 1);
+}
+
+/** Returns the current thread coordinates. */
+inline const float2 get_current_coords_quantized()
+{
+ return (float2)(get_global_id(0) * 4, get_global_id(1));
+}
+
+/** Computes the bilinear interpolation for each set of coordinates in the vector coords and returns the values
+ *
+ * @param[in] in Pointer to the source image.
+ * @param[in] coords Vector of four 2D coordinates. Even pos is x and odd y.
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ * @param[in] border_size Border size
+ * @param[in] scale Scale value
+ * @param[in] offset_qasymm Offset value
+ */
+inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border_quantized(const Image *in, const float8 coords, const float width, const float height, const float border_size,
+ const float scale, const int offset_qasymm)
+{
+ // If any of the 4 texels is out of the image's boundaries we use the border value (REPLICATE or CONSTANT) for any texel out of the image.
+
+ // Sets the 4x4 coordinates for each of the four input texels
+ const float8 fc = floor(coords);
+ const float16 c1 = (float16)(
+ clamp_to_border_with_size_quantized(get_neighbour_coords_quantized((float2)(fc.s0, fc.s1)), width, height, border_size),
+ clamp_to_border_with_size_quantized(get_neighbour_coords_quantized((float2)(fc.s2, fc.s3)), width, height, border_size));
+ const float16 c2 = (float16)(
+ clamp_to_border_with_size_quantized(get_neighbour_coords_quantized((float2)(fc.s4, fc.s5)), width, height, border_size),
+ clamp_to_border_with_size_quantized(get_neighbour_coords_quantized((float2)(fc.s6, fc.s7)), width, height, border_size));
+
+ // Loads the values from the input image
+ const int16 t = (int16)(
+ /* tl, tr, bl, br */
+ * ((__global DATA_TYPE *)offset(in, c1.s0, c1.s1)), *((__global DATA_TYPE *)offset(in, c1.s2, c1.s3)),
+ *((__global DATA_TYPE *)offset(in, c1.s4, c1.s5)), *((__global DATA_TYPE *)offset(in, c1.s6, c1.s7)),
+ *((__global DATA_TYPE *)offset(in, c1.s8, c1.s9)), *((__global DATA_TYPE *)offset(in, c1.sa, c1.sb)),
+ *((__global DATA_TYPE *)offset(in, c1.sc, c1.sd)), *((__global DATA_TYPE *)offset(in, c1.se, c1.sf)),
+ *((__global DATA_TYPE *)offset(in, c2.s0, c2.s1)), *((__global DATA_TYPE *)offset(in, c2.s2, c2.s3)),
+ *((__global DATA_TYPE *)offset(in, c2.s4, c2.s5)), *((__global DATA_TYPE *)offset(in, c2.s6, c2.s7)),
+ *((__global DATA_TYPE *)offset(in, c2.s8, c2.s9)), *((__global DATA_TYPE *)offset(in, c2.sa, c2.sb)),
+ *((__global DATA_TYPE *)offset(in, c2.sc, c2.sd)), *((__global DATA_TYPE *)offset(in, c2.se, c2.sf)));
+
+ const float16 inf32 = convert_float16(t - (int16)offset_qasymm) * (float16)scale;
+
+ const float8 a = coords - fc;
+ const float8 b = ((float8)(1.f)) - a;
+ const float4 fr = (float4)(
+ ((inf32.s0 * b.s0 * b.s1) + (inf32.s1 * a.s0 * b.s1) + (inf32.s2 * b.s0 * a.s1) + (inf32.s3 * a.s0 * a.s1)),
+ ((inf32.s4 * b.s2 * b.s3) + (inf32.s5 * a.s2 * b.s3) + (inf32.s6 * b.s2 * a.s3) + (inf32.s7 * a.s2 * a.s3)),
+ ((inf32.s8 * b.s4 * b.s5) + (inf32.s9 * a.s4 * b.s5) + (inf32.sa * b.s4 * a.s5) + (inf32.sb * a.s4 * a.s5)),
+ ((inf32.sc * b.s6 * b.s7) + (inf32.sd * a.s6 * b.s7) + (inf32.se * b.s6 * a.s7) + (inf32.sf * a.s6 * a.s7)));
+
+ const uchar4 res = convert_uchar4_sat(convert_int4_sat_rtp(fr / scale) + offset_qasymm);
+
+ return res;
+}
+
+/* FIXME(COMPMID-682): Clamp border properly in UNDEFINED border mode in Warp, Scale, Remap */
+/** Computes the bilinear interpolation for each set of coordinates in the vector coords and returns the values
+ *
+ * @param[in] in Pointer to the source image.
+ * @param[in] coords Vector of four 2D coordinates. Even pos is x and odd y.
+ * @param[in] width Width of the image
+ * @param[in] height Height of the image
+ * @param[in] scale Scale value
+ * @param[in] offset_qasymm Offset value
+ */
+inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_quantized(const Image *in, const float8 coords, const float width, const float height, const float scale, const int offset_qasymm)
+{
+ return bilinear_interpolate_with_border_quantized(in, coords, width, height, 1, scale, offset_qasymm);
+}
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index d56d6f7da8..ce6c016154 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -62,7 +62,7 @@ inline std::pair<float, float> calculate_scale_factors(const ITensorInfo &input,
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
@@ -170,6 +170,8 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
float hr = 0.f;
std::tie(wr, hr) = calculate_scale_factors(*input->info(), *output->info());
+ const bool call_quantized_kernel = is_data_type_quantized_asymmetric(input->info()->data_type()) && policy == InterpolationPolicy::BILINEAR;
+
DataLayout data_layout = input->info()->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -200,11 +202,18 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
build_opts.add_option("-DBORDER_SIZE=" + support::cpp11::to_string(border.right));
build_opts.add_option_if(border_mode == BorderMode::REPLICATE, "-DBORDER_MODE_REPLICATE");
build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
+ if(call_quantized_kernel)
+ {
+ build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale));
+ build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+ }
std::string interpolation_name = string_from_interpolation_policy(policy);
std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- std::string kernel_name = "scale_" + interpolation_name + "_" + lower_string(string_from_data_layout(data_layout));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ std::string kernel_name = "scale_" + interpolation_name;
+ kernel_name += call_quantized_kernel ? "_quantized_" : "_";
+ kernel_name += lower_string(string_from_data_layout(data_layout));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
unsigned int idx = data_layout == DataLayout::NHWC ? 2 * num_arguments_per_3D_tensor() : 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp
index 92d4f96c6a..3bf7c90a1e 100644
--- a/tests/validation/CL/Scale.cpp
+++ b/tests/validation/CL/Scale.cpp
@@ -78,7 +78,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
TensorInfo(TensorShape(36U, 36U, 2U, 4U), 1, DataType::U8),
TensorInfo(TensorShape(40U, 35U, 2U, 4U), 1, DataType::S16),
TensorInfo(TensorShape(37U, 37U, 2U), 1, DataType::F32), // window shrink
- TensorInfo(TensorShape(128U, 64U, 1U, 3U), 1, DataType::QASYMM8), // not supported
TensorInfo(TensorShape(37U, 37U, 3U, 4U), 1, DataType::F32), // mismatching datatype
TensorInfo(TensorShape(28U, 33U, 2U), 1, DataType::F32), // policy area, scale factor not correct
}),
@@ -87,7 +86,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
TensorInfo(TensorShape(40U, 76U, 2U, 4U), 1, DataType::U8),
TensorInfo(TensorShape(28U, 32U, 2U, 4U), 1, DataType::S16),
TensorInfo(TensorShape(39U, 55U, 2U), 1, DataType::F32), // window shrink
- TensorInfo(TensorShape(137U, 134U, 1U, 3U), 1, DataType::QASYMM8), // not supported
TensorInfo(TensorShape(39U, 77U, 3U, 4U), 1, DataType::F16), // mismatching datatype
TensorInfo(TensorShape(26U, 21U, 2U), 1, DataType::F32), // policy area, scale factor not correct
})),
@@ -97,7 +95,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
InterpolationPolicy::NEAREST_NEIGHBOR,
InterpolationPolicy::NEAREST_NEIGHBOR,
InterpolationPolicy::BILINEAR,
- InterpolationPolicy::BILINEAR,
InterpolationPolicy::AREA,
})),
framework::dataset::make("BorderMode",{ BorderMode::UNDEFINED,
@@ -107,9 +104,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
BorderMode::UNDEFINED,
BorderMode::UNDEFINED,
BorderMode::UNDEFINED,
- BorderMode::UNDEFINED,
})),
- framework::dataset::make("Expected", { true, true, true, true, false, false, false, false })),
+ framework::dataset::make("Expected", { true, true, true, true, false, false, false })),
input_info, output_info, policy, border_mode, expected)
{
Status status = CLScale::validate(&input_info.clone()->set_is_resizable(false),
@@ -197,7 +193,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<float>, framework::DatasetMode::
// Validate output
validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
}
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
DataType::F16)),
@@ -227,8 +223,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<half>, framework::DatasetMode::N
// Validate output
validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
TEST_SUITE(Integer)
TEST_SUITE(U8)
@@ -260,7 +256,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<uint8_t>, framework::DatasetMode
// Validate output
validate(CLAccessor(_target), _reference, valid_region, tolerance_u8);
}
-TEST_SUITE_END()
+TEST_SUITE_END() // U8
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
DataType::S16)),
@@ -290,11 +286,50 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<int16_t>, framework::DatasetMode
// Validate output
validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // S16
+TEST_SUITE_END() // Integer
+
+template <typename T>
+using CLScaleQuantizedFixture = ScaleValidationQuantizedFixture<CLTensor, CLAccessor, CLScale, T>;
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -1) })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ datasets::BorderModes()),
+ datasets::SamplingPolicies()))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_u8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -1) })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ datasets::BorderModes()),
+ datasets::SamplingPolicies()))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_u8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // Scale
+TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index 5413147699..2be02ec4d6 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -41,20 +41,21 @@ namespace test
namespace validation
{
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ScaleValidationFixture : public framework::Fixture
+class ScaleValidationGenericFixture : public framework::Fixture
{
public:
template <typename...>
- void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+ void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
{
constexpr float max_width = 8192.0f;
constexpr float max_height = 6384.0f;
- _shape = shape;
- _policy = policy;
- _border_mode = border_mode;
- _sampling_policy = sampling_policy;
- _data_type = data_type;
+ _shape = shape;
+ _policy = policy;
+ _border_mode = border_mode;
+ _sampling_policy = sampling_policy;
+ _data_type = data_type;
+ _quantization_info = quantization_info;
std::mt19937 generator(library->seed());
std::uniform_real_distribution<float> distribution_float(0.25, 3);
@@ -70,8 +71,8 @@ public:
std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
T constant_border_value = static_cast<T>(distribution_u8(generator));
- _target = compute_target(shape, data_layout, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy);
- _reference = compute_reference(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy);
+ _target = compute_target(shape, data_layout, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, quantization_info);
+ _reference = compute_reference(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, quantization_info);
}
protected:
@@ -82,6 +83,11 @@ protected:
{
library->fill_tensor_uniform(tensor, 0);
}
+ else if(is_data_type_quantized(tensor.data_type()))
+ {
+ std::uniform_int_distribution<> distribution(0, 100);
+ library->fill(tensor, distribution, 0);
+ }
else
{
// Restrict range for float to avoid any floating point issues
@@ -91,7 +97,8 @@ protected:
}
TensorType compute_target(TensorShape shape, DataLayout data_layout, const float scale_x, const float scale_y,
- InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy)
+ InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy,
+ QuantizationInfo quantization_info)
{
// Change shape in case of NHWC.
if(data_layout == DataLayout::NHWC)
@@ -100,7 +107,7 @@ protected:
}
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, _data_type, 1, QuantizationInfo(), data_layout);
+ TensorType src = create_tensor<TensorType>(shape, _data_type, 1, quantization_info, data_layout);
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -108,7 +115,7 @@ protected:
TensorShape shape_scaled(shape);
shape_scaled.set(idx_width, shape[idx_width] * scale_x);
shape_scaled.set(idx_height, shape[idx_height] * scale_y);
- TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, QuantizationInfo(), data_layout);
+ TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, quantization_info, data_layout);
// Create and configure function
FunctionType scale;
@@ -134,10 +141,11 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &shape, const float scale_x, const float scale_y,
- InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy)
+ InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy,
+ QuantizationInfo quantization_info)
{
// Create reference
- SimpleTensor<T> src{ shape, _data_type, 1, QuantizationInfo() };
+ SimpleTensor<T> src{ shape, _data_type, 1, quantization_info };
// Fill reference
fill(src);
@@ -152,6 +160,40 @@ protected:
BorderMode _border_mode{};
SamplingPolicy _sampling_policy{};
DataType _data_type{};
+ QuantizationInfo _quantization_info{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ScaleValidationQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+ {
+ ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+ data_type,
+ quantization_info,
+ data_layout,
+ policy,
+ border_mode,
+ sampling_policy);
+ }
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ScaleValidationFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+ {
+ ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+ data_type,
+ QuantizationInfo(),
+ data_layout,
+ policy,
+ border_mode,
+ sampling_policy);
+ }
};
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index f8a8b88cf9..2f7bf2deb3 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -37,8 +37,8 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale)
+SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
+ SamplingPolicy sampling_policy, bool ceil_policy_scale)
{
// Add 1 if ceil_policy_scale is true
const size_t round_value = ceil_policy_scale ? 1U : 0U;
@@ -168,8 +168,32 @@ SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, I
return out;
}
-template SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale);
+template <typename T>
+SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
+ SamplingPolicy sampling_policy, bool ceil_policy_scale)
+{
+ return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale);
+}
+
+template <>
+SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
+ SamplingPolicy sampling_policy, bool ceil_policy_scale)
+{
+ SimpleTensor<uint8_t> dst;
+ if(src.quantization_info().scale != 0.f)
+ {
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ float constant_border_value_f = scvt_f32_qasymm8(constant_border_value, src.quantization_info().scale, src.quantization_info().offset);
+ SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale);
+ dst = convert_to_asymmetric(dst_tmp, src.quantization_info());
+ }
+ else
+ {
+ dst = scale_core<uint8_t>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale);
+ }
+ return dst;
+}
+
template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value,
SamplingPolicy sampling_policy, bool ceil_policy_scale);
template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value,
diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h
index 566e30af10..66267ebc9a 100644
--- a/tests/validation/reference/Scale.h
+++ b/tests/validation/reference/Scale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_TEST_SCALE_H__
#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
namespace arm_compute
{
@@ -35,7 +36,7 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0,
+SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0,
SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false);
} // namespace reference
} // namespace validation