aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-10-07 17:17:16 +0100
committerSheri Zhang <sheri.zhang@arm.com>2020-10-20 14:12:05 +0000
commited2a8ed7efb54ae0ec539b8735820b2c30756c44 (patch)
tree4de3adfd3fb8fa3d5a8b1df3f36e0db46d32a2b2
parent68dd25fbe6e4d3c3513fa5993863419769aa08fc (diff)
downloadComputeLibrary-ed2a8ed7efb54ae0ec539b8735820b2c30756c44.tar.gz
COMPMID-3715: Remove OpenCL padding - CLElementwiseOperationKernel
Change-Id: I8a685d4ac5de747a0f775bd10be9c411cf394953 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4140 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLElementwiseOperations.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLPReluLayer.h3
-rw-r--r--src/core/CL/cl_kernels/elementwise_operation.cl49
-rw-r--r--src/core/CL/cl_kernels/elementwise_operation_quantized.cl63
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp50
-rw-r--r--src/runtime/CL/functions/CLElementwiseOperations.cpp62
-rw-r--r--src/runtime/CL/functions/CLPReluLayer.cpp37
-rw-r--r--tests/validation/CL/ArithmeticAddition.cpp20
-rw-r--r--tests/validation/CL/ArithmeticDivision.cpp5
-rw-r--r--tests/validation/CL/ArithmeticSubtraction.cpp5
-rw-r--r--tests/validation/CL/ElementwiseMax.cpp5
-rw-r--r--tests/validation/CL/ElementwiseMin.cpp5
-rw-r--r--tests/validation/CL/ElementwisePower.cpp5
-rw-r--r--tests/validation/CL/ElementwiseSquaredDiff.cpp5
-rw-r--r--tests/validation/CL/PReluLayer.cpp5
16 files changed, 107 insertions, 235 deletions
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index b459292161..80737cb8eb 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -55,7 +55,6 @@ public:
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
protected:
/** The name of the operation */
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 2d9d43863d..31d4f2e745 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H
#define ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
@@ -99,9 +98,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for subtraction
@@ -169,9 +165,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for division
@@ -208,9 +201,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLArithmeticOperationKernel for max
@@ -247,9 +237,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLArithmeticOperationKernel for min
@@ -286,9 +273,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLArithmeticOperationKernel for squared difference
@@ -325,9 +309,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
/** Basic function to run @ref CLArithmeticOperationKernel for power
@@ -364,9 +345,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
} // namespace experimental
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 84743508df..ffde9ec186 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -65,9 +65,6 @@ public:
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
-
-private:
- CLFillBorderKernel _border_handler;
};
} // namespace experimental
diff --git a/src/core/CL/cl_kernels/elementwise_operation.cl b/src/core/CL/cl_kernels/elementwise_operation.cl
index 26826e9b8a..211eb38dca 100644
--- a/src/core/CL/cl_kernels/elementwise_operation.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation.cl
@@ -43,7 +43,7 @@
#define OP_FUN_NAME_STR(op) elementwise_operation_##op
#define OP_FUN_NAME(op) OP_FUN_NAME_STR(op)
-#if defined(OP) && defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_OUT) && defined(VEC_SIZE)
+#if defined(OP) && defined(VEC_SIZE_IN1) && defined(VEC_SIZE_IN2) && defined(VEC_SIZE_OUT) && defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_OUT)
#if defined(ACTIVATION_TYPE)
#include "activation_float_helpers.h"
@@ -51,11 +51,12 @@
/** This function executes an element-wise operation among two tensors.
*
- * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
+ * @note Vector sizes of inputs and output have to be passed at compile time using -DVEC_SIZE_IN1, -DVEC_SIZE_IN2, -DVEC_SIZE_OUT.
+ * @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE_OUT=3. It is defined as the remainder between the input's first dimension and VEC_SIZE_OUT
+ * @note The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
* e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
+ * @note To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
+ * @note The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
*
* @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -87,24 +88,36 @@ __kernel void OP_FUN_NAME(OP)(
TENSOR3D_DECLARATION(in2),
TENSOR3D_DECLARATION(out))
{
+#if VEC_SIZE_IN1 == 1
+ uint in1_x_offs = 0;
+#else // VEC_SIZE_IN1 == 1
+ uint in1_x_offs = max((int)(get_global_id(0) * VEC_SIZE_IN1 - (VEC_SIZE_IN1 - VEC_SIZE_LEFTOVER) % VEC_SIZE_IN1), 0);
+#endif // VEC_SIZE_IN1 == 1
+#if VEC_SIZE_IN2 == 1
+ uint in2_x_offs = 0;
+#else // VEC_SIZE_IN2 == 1
+ uint in2_x_offs = max((int)(get_global_id(0) * VEC_SIZE_IN2 - (VEC_SIZE_IN2 - VEC_SIZE_LEFTOVER) % VEC_SIZE_IN2), 0);
+#endif // VEC_SIZE_IN2 == 1
+ uint out_x_offs = max((int)(get_global_id(0) * VEC_SIZE_OUT - (VEC_SIZE_OUT - VEC_SIZE_LEFTOVER) % VEC_SIZE_OUT), 0);
+
// Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+ __global uchar *in1_addr = in1_ptr + in1_offset_first_element_in_bytes + in1_x_offs * sizeof(DATA_TYPE_IN1) + get_global_id(1) * in1_step_y + get_global_id(2) * in1_step_z;
+ __global uchar *in2_addr = in2_ptr + in2_offset_first_element_in_bytes + in2_x_offs * sizeof(DATA_TYPE_IN2) + get_global_id(1) * in2_step_y + get_global_id(2) * in2_step_z;
+ __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + out_x_offs * sizeof(DATA_TYPE_OUT) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z;
// Load values
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
- in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
- in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
+ VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)
+ in_a = CONVERT((VEC_DATA_TYPE(DATA_TYPE_IN1, VEC_SIZE_OUT))(VLOAD(VEC_SIZE_IN1)(0, (__global DATA_TYPE_IN1 *)in1_addr)), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT));
+ VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)
+ in_b = CONVERT((VEC_DATA_TYPE(DATA_TYPE_IN2, VEC_SIZE_OUT))(VLOAD(VEC_SIZE_IN2)(0, (__global DATA_TYPE_IN2 *)in2_addr)), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT));
// Calculate and store result
+ VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)
+ res0 = OP(in_a, in_b);
#if defined(ACTIVATION_TYPE)
- VSTORE(VEC_SIZE)
- (ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, VEC_SIZE, CONVERT(OP(in_a, in_b), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), A_VAL, B_VAL), 0, (__global DATA_TYPE_OUT *)out.ptr);
-#else // defined(ACTIVATION_TYPE)
- VSTORE(VEC_SIZE)
- (OP(in_a, in_b), 0, (__global DATA_TYPE_OUT *)out.ptr);
+ res0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE_OUT, VEC_SIZE_OUT, res0, A_VAL, B_VAL);
#endif // defined(ACTIVATION_TYPE)
+
+ STORE_VECTOR_SELECT(res, DATA_TYPE_OUT, out_addr, VEC_SIZE_OUT, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
}
-#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_OUT) && defined(VEC_SIZE) */
+#endif /* defined(OP) && defined(VEC_SIZE_IN1) && defined(VEC_SIZE_IN2) && defined(VEC_SIZE_OUT) && defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_OUT) */
diff --git a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
index eb57da828d..9edfb4f9a2 100644
--- a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,25 +37,27 @@
#define OP_FUN_NAME_STR(op) elementwise_operation_##op##_quantized
#define OP_FUN_NAME(op) OP_FUN_NAME_STR(op)
-#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT)
+#if defined(OP) && defined(VEC_SIZE_IN1) && defined(VEC_SIZE_IN2) && defined(VEC_SIZE_OUT) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT)
-#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
-#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
+#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE_OUT)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE_OUT)
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)
/** This function executes an element-wise operation among two tensors.
*
- * @attention The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10
- * @attention The quantization offset of the second operand must be passed at compile time using -DOFFSET_IN2, i.e. -DOFFSET_IN2=10
- * @attention The quantization offset of the output must be passed at compile time using -DOFFSET_OUT, i.e. -DOFFSET_OUT=10
- * @attention The quantization scale of the first operand must be passed at compile time using -DSCALE_IN1, i.e. -DSCALE_IN1=10
- * @attention The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10
- * @attention The quantization scale of the output must be passed at compile time using -DSCALE_OUT, i.e. -DSCALE_OUT=10
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
- * @attention For QSYMM16 operations OFFSET_IN1, OFFSET_IN2 and OFFSET_OUT must be set to zero
- * @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
+ * @note Vector sizes of inputs and output have to be passed at compile time using -DVEC_SIZE_IN1, -DVEC_SIZE_IN2, -DVEC_SIZE_OUT.
+ * @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
+ * @note In case of broadcasting along the X dimension the proper preprocessor argument should be passed depending on the input (e.g. -DIS_IN1_X_BROADCASTING, -DIS_IN2_X_BROADCASTING)
+ * @note The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10
+ * @note The quantization offset of the second operand must be passed at compile time using -DOFFSET_IN2, i.e. -DOFFSET_IN2=10
+ * @note The quantization offset of the output must be passed at compile time using -DOFFSET_OUT, i.e. -DOFFSET_OUT=10
+ * @note The quantization scale of the first operand must be passed at compile time using -DSCALE_IN1, i.e. -DSCALE_IN1=10
+ * @note The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10
+ * @note The quantization scale of the output must be passed at compile time using -DSCALE_OUT, i.e. -DSCALE_OUT=10
+ * @note To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
+ * @note The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
+ * @note For QSYMM16 operations OFFSET_IN1, OFFSET_IN2 and OFFSET_OUT must be set to zero
+ * @note The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
*
* @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8/QSYMM16
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -87,13 +89,25 @@ __kernel void OP_FUN_NAME(OP)(
TENSOR3D_DECLARATION(in2),
TENSOR3D_DECLARATION(out))
{
+#if VEC_SIZE_IN1 == 1
+ uint in1_x_offs = 0;
+#else // VEC_SIZE_IN1 == 1
+ uint in1_x_offs = max((int)(get_global_id(0) * VEC_SIZE_IN1 - (VEC_SIZE_IN1 - VEC_SIZE_LEFTOVER) % VEC_SIZE_IN1), 0);
+#endif // VEC_SIZE_IN1 == 1
+#if VEC_SIZE_IN2 == 1
+ uint in2_x_offs = 0;
+#else // VEC_SIZE_IN2 == 1
+ uint in2_x_offs = max((int)(get_global_id(0) * VEC_SIZE_IN2 - (VEC_SIZE_IN2 - VEC_SIZE_LEFTOVER) % VEC_SIZE_IN2), 0);
+#endif // VEC_SIZE_IN2 == 1
+ uint out_x_offs = max((int)(get_global_id(0) * VEC_SIZE_OUT - (VEC_SIZE_OUT - VEC_SIZE_LEFTOVER) % VEC_SIZE_OUT), 0);
+
// Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+ __global uchar *in1_addr = in1_ptr + in1_offset_first_element_in_bytes + in1_x_offs * sizeof(DATA_TYPE_OUT) + get_global_id(1) * in1_step_y + get_global_id(2) * in1_step_z;
+ __global uchar *in2_addr = in2_ptr + in2_offset_first_element_in_bytes + in2_x_offs * sizeof(DATA_TYPE_OUT) + get_global_id(1) * in2_step_y + get_global_id(2) * in2_step_z;
+ __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + out_x_offs * sizeof(DATA_TYPE_OUT) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z;
- VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in1.ptr), VEC_INT);
- VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in2.ptr), VEC_INT);
+ VEC_INT in_a = CONVERT((VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT))(VLOAD(VEC_SIZE_IN1)(0, (__global DATA_TYPE_OUT *)in1_addr)), VEC_INT);
+ VEC_INT in_b = CONVERT((VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT))(VLOAD(VEC_SIZE_IN1)(0, (__global DATA_TYPE_OUT *)in2_addr)), VEC_INT);
in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
@@ -101,10 +115,9 @@ __kernel void OP_FUN_NAME(OP)(
const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
const VEC_FLOAT qresf32 = OP(in1f32, in2f32) / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFFSET_OUT));
- const VEC_TYPE res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_TYPE);
+ const VEC_TYPE res0 = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_TYPE);
// Store result
- VSTORE(VEC_SIZE)
- (res, 0, (__global DATA_TYPE_OUT *)out.ptr);
+ STORE_VECTOR_SELECT(res, DATA_TYPE_OUT, out_addr, VEC_SIZE_OUT, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
}
-#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT) */
+#endif /* defined(OP) && defined(VEC_SIZE_IN1) && defined(VEC_SIZE_IN2) && defined(VEC_SIZE_OUT) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT) */
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index f0712f5863..da28f3d886 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -36,7 +36,7 @@ namespace arm_compute
{
namespace
{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
+constexpr unsigned int vector_size_byte_opencl = 16;
std::map<ArithmeticOperation, std::string> supported_arithmetic_ops =
{
@@ -152,10 +152,15 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
{
CLBuildOptions build_opts;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output.element_size(), output.dimension(0));
+
build_opts.add_option("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1.data_type()));
build_opts.add_option("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2.data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output.data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_IN1=" + support::cpp11::to_string(input1.dimension(0) == 1 ? 1 : num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_IN2=" + support::cpp11::to_string(input2.dimension(0) == 1 ? 1 : num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(output.dimension(0) % num_elems_processed_per_iteration));
build_opts.add_option("-DOP=" + operation_string);
if(is_data_type_quantized(input1.data_type()))
{
@@ -173,31 +178,17 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
return build_opts;
}
-std::pair<Status, Window> configure_window_arithmetic_common(const ValidRegion &valid_region, ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
+std::pair<Status, Window> configure_window_arithmetic_common(ITensorInfo &output)
{
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(input1);
- Window win_input2 = win.broadcast_if_dimension_le_one(input2);
-
- AccessWindowHorizontal input1_access(&input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access)
- || update_window_and_padding(win_input2, input2_access)
- || update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output.element_size(), output.dimension(0));
+ Window win = calculate_max_window(output, Steps(num_elems_processed_per_iteration));
+ return std::make_pair(Status{}, win);
}
std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
{
const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = broadcast_pair.first;
set_shape_if_empty(output, out_shape);
@@ -226,16 +217,15 @@ std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators
set_data_type_if_unknown(output, DataType::QSYMM16);
}
- return configure_window_arithmetic_common(valid_region, input1, input2, output);
+ return configure_window_arithmetic_common(output);
}
std::pair<Status, Window> validate_and_configure_window_for_division(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
{
const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = broadcast_pair.first;
auto_init_if_empty(output, out_shape, 1, input1.data_type());
- return configure_window_arithmetic_common(valid_region, input1, input2, output);
+ return configure_window_arithmetic_common(output);
}
} // namespace
@@ -315,30 +305,20 @@ void CLElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &wi
Window slice = collapsed.first_slice_window_3D();
Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
do
{
unsigned int idx = 0;
-
add_3D_tensor_argument(idx, src_0, slice_input1);
add_3D_tensor_argument(idx, src_1, slice_input2);
add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
-
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
}
while(collapsed.slide_window_slice_3D(slice));
}
-BorderSize CLElementwiseOperationKernel::border_size() const
-{
- const unsigned int replicateSize = _output->dimension(0) - std::min(_input1->dimension(0), _input2->dimension(0));
- const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize{ 0, border, 0, 0 };
-}
-
/** Arithmetic operations with saturation*/
void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy,
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp
index 6f664725c5..7b4d3c629d 100644
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -32,43 +32,9 @@
namespace arm_compute
{
-namespace
-{
-void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ITensorInfo *input1, ITensorInfo *input2, const ITensorInfo *output)
-{
- if(output->dimension(0) > 1)
- {
- ITensorInfo *broadcasted_info = (input1->dimension(0) == 1) ? input1 : input2;
-
- if(broadcasted_info->dimension(0) == 1)
- {
- border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE);
- }
- }
-}
-
-ITensorPack select_border_input(ITensorPack &tensors)
-{
- ITensorPack pack;
- if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
- {
- if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
- }
- else
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
- }
- }
- return pack;
-}
-} // namespace
-
namespace experimental
{
CLArithmeticAddition::CLArithmeticAddition()
- : _border_handler()
{
}
@@ -77,7 +43,6 @@ void CLArithmeticAddition::configure(const CLCompileContext &compile_context, IT
auto k = arm_compute::support::cpp14::make_unique<CLSaturatedArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
@@ -87,13 +52,10 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
void CLArithmeticAddition::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLArithmeticSubtraction::CLArithmeticSubtraction()
- : _border_handler()
{
}
void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy,
@@ -102,7 +64,6 @@ void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context,
auto k = arm_compute::support::cpp14::make_unique<CLSaturatedArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
@@ -113,13 +74,10 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
void CLArithmeticSubtraction::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLArithmeticDivision::CLArithmeticDivision()
- : _border_handler()
{
}
@@ -128,7 +86,6 @@ void CLArithmeticDivision::configure(const CLCompileContext &compile_context, IT
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::DIV, input1, input2, output, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -138,13 +95,10 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
void CLArithmeticDivision::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLElementwiseMax::CLElementwiseMax()
- : _border_handler()
{
}
@@ -153,7 +107,6 @@ void CLElementwiseMax::configure(const CLCompileContext &compile_context, ITenso
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::MAX, input1, input2, output, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -163,13 +116,10 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
void CLElementwiseMax::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLElementwiseMin::CLElementwiseMin()
- : _border_handler()
{
}
@@ -178,7 +128,6 @@ void CLElementwiseMin::configure(const CLCompileContext &compile_context, ITenso
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::MIN, input1, input2, output, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -188,13 +137,10 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
void CLElementwiseMin::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLElementwiseSquaredDiff::CLElementwiseSquaredDiff()
- : _border_handler()
{
}
@@ -203,7 +149,6 @@ void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -213,13 +158,10 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
void CLElementwiseSquaredDiff::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLElementwisePower::CLElementwisePower()
- : _border_handler()
{
}
@@ -228,7 +170,6 @@ void CLElementwisePower::configure(const CLCompileContext &compile_context, ITen
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::POWER, input1, input2, output, act_info);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
}
Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -238,8 +179,6 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
void CLElementwisePower::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
} // namespace experimental
@@ -477,7 +416,6 @@ struct CLElementwiseSquaredDiff::Impl
const ICLTensor *src_1{ nullptr };
ICLTensor *dst{ nullptr };
std::unique_ptr<experimental::CLElementwiseSquaredDiff> op{ nullptr };
- std::unique_ptr<CLFillBorderKernel> _border_handler{ nullptr };
};
CLElementwiseSquaredDiff::CLElementwiseSquaredDiff()
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index e03bd13284..aaddd46071 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -30,43 +30,9 @@
namespace arm_compute
{
-namespace
-{
-void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ITensorInfo *input1, ITensorInfo *input2, const ITensorInfo *output)
-{
- if(output->dimension(0) > 1)
- {
- ITensorInfo *broadcasted_info = (input1->dimension(0) == 1) ? input1 : input2;
-
- if(broadcasted_info->dimension(0) == 1)
- {
- border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE);
- }
- }
-}
-
-ITensorPack select_border_input(ITensorPack &tensors)
-{
- ITensorPack pack;
- if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
- {
- if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
- }
- else
- {
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
- }
- }
- return pack;
-}
-} // namespace
-
namespace experimental
{
CLPReluLayer::CLPReluLayer()
- : _border_handler()
{
}
@@ -75,7 +41,6 @@ void CLPReluLayer::configure(const CLCompileContext &compile_context, ITensorInf
auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, output);
_kernel = std::move(k);
- configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input, alpha, output);
}
Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
@@ -85,8 +50,6 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha
void CLPReluLayer::run(ITensorPack &tensors)
{
- auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
ICLOperator::run(tensors);
}
} // namespace experimental
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index 93faa7e63a..2b158eabfb 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -78,23 +78,20 @@ TEST_SUITE(ArithmeticAddition)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
@@ -127,6 +124,21 @@ TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
+TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL)
+{
+ CLTensor src1 = create_tensor<CLTensor>(TensorShape(3U, 3U), DataType::F32, 1, QuantizationInfo());
+ CLTensor src2 = create_tensor<CLTensor>(TensorShape(3U, 3U), DataType::F32, 1, QuantizationInfo());
+ CLTensor dst = create_tensor<CLTensor>(TensorShape(3U, 3U), DataType::F32, 1, QuantizationInfo());
+
+ // Create and configure function
+ CLArithmeticAddition add;
+ add.configure(&src1, &src2, &dst, ConvertPolicy::WRAP);
+
+ validate(src1.info()->padding(), PaddingSize(0, 0, 0, 0));
+ validate(src2.info()->padding(), PaddingSize(0, 0, 0, 0));
+ validate(dst.info()->padding(), PaddingSize(0, 0, 0, 0));
+}
+
template <typename T>
using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp
index b927a8c892..36567dc02a 100644
--- a/tests/validation/CL/ArithmeticDivision.cpp
+++ b/tests/validation/CL/ArithmeticDivision.cpp
@@ -68,23 +68,20 @@ TEST_SUITE(ArithmeticDivision)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, false, false, false, false})),
+ framework::dataset::make("Expected", { true, false, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLArithmeticDivision::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index f66671b78b..2709fcaedb 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -81,23 +81,20 @@ TEST_SUITE(ArithmeticSubtraction)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/ElementwiseMax.cpp b/tests/validation/CL/ElementwiseMax.cpp
index bf8bb413aa..b9444b2795 100644
--- a/tests/validation/CL/ElementwiseMax.cpp
+++ b/tests/validation/CL/ElementwiseMax.cpp
@@ -81,23 +81,20 @@ TEST_SUITE(ElementwiseMax)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseMax::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/ElementwiseMin.cpp b/tests/validation/CL/ElementwiseMin.cpp
index 3b3d6a0639..8f53b241ab 100644
--- a/tests/validation/CL/ElementwiseMin.cpp
+++ b/tests/validation/CL/ElementwiseMin.cpp
@@ -81,23 +81,20 @@ TEST_SUITE(ElementwiseMin)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseMin::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/ElementwisePower.cpp b/tests/validation/CL/ElementwisePower.cpp
index 2cafdbb84e..a2d3ba6c09 100644
--- a/tests/validation/CL/ElementwisePower.cpp
+++ b/tests/validation/CL/ElementwisePower.cpp
@@ -67,23 +67,20 @@ TEST_SUITE(ElementwisePower)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwisePower::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/ElementwiseSquaredDiff.cpp b/tests/validation/CL/ElementwiseSquaredDiff.cpp
index bdae81f1f6..0a4ab6627b 100644
--- a/tests/validation/CL/ElementwiseSquaredDiff.cpp
+++ b/tests/validation/CL/ElementwiseSquaredDiff.cpp
@@ -80,23 +80,20 @@ TEST_SUITE(ElementwiseSquaredDiff)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseSquaredDiff::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp
index ed5e57f669..82436a9671 100644
--- a/tests/validation/CL/PReluLayer.cpp
+++ b/tests/validation/CL/PReluLayer.cpp
@@ -72,23 +72,20 @@ TEST_SUITE(PReluLayer)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLPReluLayer::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);