aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-12-03 16:26:35 +0000
committerManuel Bottini <manuel.bottini@arm.com>2020-12-18 13:36:07 +0000
commit0c58f99495844c6ace629116451dae00e8c27418 (patch)
tree709fe9b3088626a135aec253ca20ede21d2c870f
parent2567adfc881049fefada9be523347b4f384e6d27 (diff)
downloadComputeLibrary-0c58f99495844c6ace629116451dae00e8c27418.tar.gz
Remove OpenCL padding CLScaleKernel
Resolves COMPMID-3918 Change-Id: I970b1eaf2ae6f2f5a8cfc318cd1a3dfd3ba36fdb Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4668 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLScale.h76
-rw-r--r--src/core/CL/cl_kernels/scale.cl51
-rw-r--r--src/core/CL/cl_kernels/scale_quantized.cl49
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp22
-rw-r--r--src/runtime/CL/functions/CLScale.cpp37
5 files changed, 112 insertions, 123 deletions
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index 360d63ea22..1739190972 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -26,20 +26,37 @@
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
#include <cstdint>
namespace arm_compute
{
+// Forward declarations
class CLCompileContext;
class ICLTensor;
class ITensorInfo;
/** Basic function to run @ref CLScaleKernel */
-class CLScale : public ICLSimpleFunction
+class CLScale : public IFunction
{
public:
+ /** Default Constructor */
+ CLScale();
+ /** Default Destructor */
+ ~CLScale() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScale(const CLScale &) = delete;
+ /** Default move constructor */
+ CLScale(CLScale &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScale &operator=(const CLScale &) = delete;
+ /** Default move assignment operator */
+ CLScale &operator=(CLScale &&) = default;
+
/** Initialize the function's source, destination, interpolation type and border_mode.
*
* @param[in,out] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
@@ -57,37 +74,6 @@ public:
* @param[in] info @ref ScaleKernelInfo descriptor to be used to configure
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info);
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLScale
*
@@ -99,23 +85,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLScale
- *
- * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32.
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- *
- * @return a status
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
+
+ // Inherited methods overridden:
+ void run() override;
+
+protected:
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ std::unique_ptr<CLScaleKernel> _kernel;
};
}
#endif /*ARM_COMPUTE_CLSCALE_H */
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl
index a01ff89a4f..d4c27e6cf6 100644
--- a/src/core/CL/cl_kernels/scale.cl
+++ b/src/core/CL/cl_kernels/scale.cl
@@ -189,8 +189,8 @@ __kernel void scale_nearest_neighbour_nhwc(
float new_x = get_global_id(1) * scale_x;
float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;
#elif SAMPLING_POLICY_CENTER
- float new_x = (get_global_id(1) + 0.5f) * scale_x;
- float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
+ float new_x = (get_global_id(1) + 0.5f) * scale_x;
+ float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
#else /* SAMPLING_POLICY */
#error("Unsupported sampling policy");
#endif /* SAMPLING_POLICY */
@@ -209,6 +209,7 @@ __kernel void scale_nearest_neighbour_nhwc(
* @note Sampling policy to be used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
* @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16
+ * @note The value to be used at the edges of the images shoud be given as a preprocessor argument using -DCONSTANT_VALUE=value.
*
* @param[in] in_ptr Pointer to the source image. Supported data types: U8/S16/F16/F32.
* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
@@ -230,6 +231,7 @@ __kernel void scale_nearest_neighbour_nhwc(
* @param[in] input_height Input image height
* @param[in] scale_x The scale factor along x dimension
* @param[in] scale_y The scale factor along y dimension
+ *
*/
__kernel void scale_bilinear_nhwc(
TENSOR4D_DECLARATION(in),
@@ -252,27 +254,38 @@ __kernel void scale_bilinear_nhwc(
#error("Unsupported sampling policy");
#endif /* SAMPLING_POLICY */
- const float new_xf = floor(new_x);
- const float new_yf = floor(new_y);
- float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
- float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
- float clamped_x_ = clamped_x;
- float clamped_x1_ = clamped_x1;
- const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
- const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
+ const float new_xf = floor(new_x);
+ const float new_yf = floor(new_y);
+ const float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
+ const float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
+ const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
+ const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
#ifndef BORDER_MODE_REPLICATE
- clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);
- clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);
+ const bool check_x = (0.f <= new_xf && new_xf < input_width);
+ const bool check_x1 = (-1.f <= new_xf && new_xf < input_width - 1);
+ const bool check_y = (0.f <= new_yf && new_yf < input_height);
+ const bool check_y1 = (-1.f <= new_yf && new_yf < input_height - 1);
+ const float ins_0 = select((float)(CONSTANT_VALUE), (float)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y);
+ const float ins_1 = select((float)(CONSTANT_VALUE), (float)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y);
+ const float ins_2 = select((float)(CONSTANT_VALUE), (float)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y1);
+ const float ins_3 = select((float)(CONSTANT_VALUE), (float)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y1);
+ float4 ins = (float4)(ins_0, ins_1, ins_2, ins_3);
+#else /* BORDER_MODE_REPLICATE */
+ float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
#endif /* BORDER_MODE_REPLICATE */
- float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
-
const float a = new_x - new_xf;
const float b = 1.f - a;
const float a1 = new_y - new_yf;
diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl
index 2aa7f185c6..010e4ed57a 100644
--- a/src/core/CL/cl_kernels/scale_quantized.cl
+++ b/src/core/CL/cl_kernels/scale_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -93,6 +93,7 @@ __kernel void scale_bilinear_quantized_nchw(
* @note Offset value for QASYMM8 data type to used is passed as -DOFFSET=<VALUE> e.g. -DOFFSET=1
* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
* @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16
+ * @note The value to be used at the edges of the images shoud be given as a preprocessor argument using -DCONSTANT_VALUE=value.
*
* @param[in] in_ptr Pointer to the source image. Supported data types: QASYMM8.
* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
@@ -114,6 +115,7 @@ __kernel void scale_bilinear_quantized_nchw(
* @param[in] input_height Input image height
* @param[in] scale_x The scale factor along x dimension
* @param[in] scale_y The scale factor along y dimension
+ * @param[in] constant_border_value Constant border value to use
*/
__kernel void scale_bilinear_quantized_nhwc(
TENSOR4D_DECLARATION(in),
@@ -136,27 +138,38 @@ __kernel void scale_bilinear_quantized_nhwc(
#error("Unsupported sampling policy");
#endif /* SAMPLING_POLICY */
- const float new_xf = floor(new_x);
- const float new_yf = floor(new_y);
- float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
- float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
- float clamped_x_ = clamped_x;
- float clamped_x1_ = clamped_x1;
- const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
- const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
+ const float new_xf = floor(new_x);
+ const float new_yf = floor(new_y);
+ const float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
+ const float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
+ const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
+ const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
#ifndef BORDER_MODE_REPLICATE
- clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);
- clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);
+ const bool check_x = (0.f <= new_xf && new_xf < input_width);
+ const bool check_x1 = (-1.f <= new_xf && new_xf < input_width - 1);
+ const bool check_y = (0.f <= new_yf && new_yf < input_height);
+ const bool check_y1 = (-1.f <= new_yf && new_yf < input_height - 1);
+ const int ins_0 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y);
+ const int ins_1 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y);
+ const int ins_2 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y1);
+ const int ins_3 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y1);
+ int4 ins = (int4)(ins_0, ins_1, ins_2, ins_3);
+#else /* BORDER_MODE_REPLICATE */
+ int4 ins = (int4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
#endif /* BORDER_MODE_REPLICATE */
- int4 ins = (int4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
-
const float a = new_x - new_xf;
const float b = 1.f - a;
const float a1 = new_y - new_yf;
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index 5a7d5830fd..f3d2fa12d5 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -120,15 +120,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
break;
case DataLayout::NHWC:
{
- num_elems_processed_per_iteration = 1;
// Configure kernel window
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input, -border.left, -border.top,
- input->dimension(0) + border.right,
- input->dimension(1) + border.bottom);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+ win = calculate_max_window(*output, Steps());
}
break;
default:
@@ -142,14 +135,13 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
BorderSize CLScaleKernel::border_size() const
{
- return BorderSize(1);
+ return BorderSize(static_cast<size_t>(_data_layout == DataLayout::NCHW));
}
Status CLScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info)
{
- BorderSize border = BorderSize(1);
-
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info));
+ BorderSize border = BorderSize(static_cast<size_t>(input->data_layout() == DataLayout::NCHW));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), info, border).first);
return Status{};
@@ -173,6 +165,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, const S
void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), info));
+ auto padding_info = get_padding_info({ input, output });
_input = input;
_output = output;
@@ -208,6 +201,7 @@ void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICL
// Create kernel
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(info.constant_border_value, input->info()->data_type()));
build_opts.add_option("-DBORDER_SIZE=" + support::cpp11::to_string(border.right));
build_opts.add_option_if(info.border_mode == BorderMode::REPLICATE, "-DBORDER_MODE_REPLICATE");
build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
@@ -219,7 +213,6 @@ void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICL
build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
}
-
std::string interpolation_name = string_from_interpolation_policy(interpolation_policy_to_use);
std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
std::string kernel_name = "scale_" + interpolation_name;
@@ -250,13 +243,16 @@ void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICL
_config_id += support::cpp11::to_string(output->info()->dimension(2));
_config_id += "_";
_config_id += support::cpp11::to_string(output->info()->dimension(3));
+ if(is_nhwc)
+ {
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+ }
}
void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
switch(_data_layout)
{
case DataLayout::NCHW:
diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp
index 6658957e07..aab5d9ba73 100644
--- a/src/runtime/CL/functions/CLScale.cpp
+++ b/src/runtime/CL/functions/CLScale.cpp
@@ -27,20 +27,17 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLScaleKernel.h"
namespace arm_compute
{
-void CLScale::configure(ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info)
+CLScale::CLScale()
+ : _border_handler(std::make_unique<CLFillBorderKernel>()), _kernel()
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
}
-void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding,
- bool align_corners)
+void CLScale::configure(ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners });
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
}
void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info)
@@ -53,30 +50,24 @@ void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *inpu
// Tune kernels
CLScheduler::get().tune_kernel_static(*_kernel);
- auto border_mode_to_use = info.border_mode;
- // In the case of NHWC we can't have undefined border mode as this would require to access elements outside z dimension,
- // so we treat it like border constant.
- if(info.border_mode == BorderMode::UNDEFINED && input->info()->data_layout() == DataLayout::NHWC)
+ if(input->info()->data_layout() == DataLayout::NCHW && !_kernel->border_size().empty())
{
- border_mode_to_use = BorderMode::CONSTANT;
+ _border_handler->configure(compile_context, input, _kernel->border_size(), info.border_mode, info.constant_border_value);
}
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value);
}
-void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value,
- SamplingPolicy sampling_policy, bool use_padding, bool align_corners)
+Status CLScale::validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info)
{
- configure(compile_context, input, output, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners });
+ return CLScaleKernel::validate(input, output, info);
}
-Status CLScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy,
- bool use_padding, bool align_corners)
+void CLScale::run()
{
- return CLScale::validate(input, output, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners });
+ if(!_kernel->border_size().empty())
+ {
+ CLScheduler::get().enqueue(*_border_handler, false);
+ }
+ CLScheduler::get().enqueue(*_kernel);
}
-Status CLScale::validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info)
-{
- return CLScaleKernel::validate(input, output, info);
-}
} // namespace arm_compute