aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-10-23 14:24:26 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-10-23 15:38:25 +0000
commit4112eed70d110376674609af92e76c68ae8b3a39 (patch)
tree8b933e67ec3cfb9c17fbbb0dc84f8082871a3e82
parent69153b3744542691cc205bafda3bbc9c84d394ce (diff)
downloadComputeLibrary-4112eed70d110376674609af92e76c68ae8b3a39.tar.gz
COMPMID-3731 Remove OpenCL padding: CLHeightConcatenateLayerKernel
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: I004128fdcc1207c25d2b959f17f04f9e1a8b4cb5 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4247 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h1
-rw-r--r--arm_compute/core/Utils.h18
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl23
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp35
-rw-r--r--src/core/Utils.cpp27
5 files changed, 59 insertions, 45 deletions
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
index 4fa2b40881..f362441944 100644
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
@@ -72,7 +72,6 @@ public:
private:
unsigned int _height_offset;
- unsigned int _num_elems_processed_per_iteration;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 681a1a708e..1c02e89ab6 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -45,6 +45,7 @@
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Calculate the rounded up quotient of val / m.
*
@@ -1096,18 +1097,25 @@ std::string string_from_pixel_value(const PixelValue &value, const DataType data
DataType data_type_from_name(const std::string &name);
/** Stores padding information before configuring a kernel
*
+ * @param[in] infos list of tensor infos to store the padding info for
+ *
+ * @return An unordered map where each tensor info pointer is paired with its original padding info
+ */
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensorInfo *> infos);
+/** Stores padding information before configuring a kernel
+ *
* @param[in] tensors list of tensors to store the padding info for
*
- * @return An unordered map where each tensor pointer is paired with its original padding info
+ * @return An unordered map where each tensor info pointer is paired with its original padding info
*/
-std::unordered_map<const ITensor *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors);
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors);
/** Check if the previously stored padding info has changed after configuring a kernel
*
- * @param[in] padding_map an unordered map where each tensor pointer is paired with its original padding info
+ * @param[in] padding_map an unordered map where each tensor info pointer is paired with its original padding info
*
- * @return true if any of the tensors has changed its paddings
+ * @return true if any of the tensor infos has changed its paddings
*/
-bool has_padding_changed(const std::unordered_map<const ITensor *, PaddingSize> &padding_map);
+bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map);
/** Input Stream operator for @ref DataType
*
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index 0b211a6d1f..0f4b5afe2c 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -330,6 +330,8 @@ __kernel void concatenate_width(
#endif /* defined(WIDTH_OFFSET) && defined(DEPTH) */
+#if defined(VEC_SIZE_LEFTOVER)
+
#if defined(HEIGHT_OFFSET) && defined(DEPTH) && defined(VEC_SIZE)
/** This kernel concatenates the input tensor into the output tensor along the second dimension
*
@@ -338,6 +340,7 @@ __kernel void concatenate_width(
* @note Vector sizes supported are 2,4,8 and 16.
* @note The offset for the second spatial dimension has to be passed at compile time using -DHEIGHT_OFFSET. i.e. -DHEIGHT_OFFSET=128
* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16
+ * @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -365,26 +368,26 @@ __kernel void concatenate_height(
TENSOR4D_DECLARATION(src),
TENSOR4D_DECLARATION(dst))
{
- Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, DEPTH);
- Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);
+ const int x_offs = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0) * sizeof(DATA_TYPE);
+
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x_offs + get_global_id(1) * src_stride_y + (get_global_id(2) % DEPTH) * src_stride_z + (get_global_id(
+ 2) / DEPTH) * src_stride_w;
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x_offs + get_global_id(1) * dst_stride_y + (get_global_id(2) % DEPTH) * dst_stride_z + (get_global_id(
+ 2) / DEPTH) * dst_stride_w;
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
+ source_values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
- VSTORE(VEC_SIZE)
- (out, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y));
+ const VEC_QUANT out0 = requantize(source_values0, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
+ STORE_VECTOR_SELECT(out, DATA_TYPE, dst_addr + HEIGHT_OFFSET * dst_stride_y, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
#else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
- VSTORE(VEC_SIZE)
- (source_values, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y));
+ STORE_VECTOR_SELECT(source_values, DATA_TYPE, dst_addr + HEIGHT_OFFSET * dst_stride_y, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
}
#endif /* defined(HEIGHT_OFFSET) && defined(DEPTH) */
-#if defined(VEC_SIZE_LEFTOVER)
-
/** This kernel concatenates the input tensor into the output tensor along the third dimension
*
* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index 1ae2599721..3f5e91e5a1 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -39,20 +39,6 @@ namespace arm_compute
{
namespace
{
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int &num_elems_processed_per_iteration)
-{
- num_elems_processed_per_iteration = 4;
- // The window needs to be based on input as we copy all the heights of input
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- bool window_changed = update_window_and_padding(win, input_access, output_access);
-
- Window win_collapsed = win.collapse(win, Window::DimZ);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win_collapsed);
-}
Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -72,15 +58,13 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset,
} // namespace
CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel()
- : _height_offset(0), _num_elems_processed_per_iteration()
+ : _height_offset(0)
{
}
Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
{
- unsigned int num_elems_processed_per_iteration;
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration).first);
return Status{};
}
@@ -89,16 +73,19 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output));
- _height_offset = height_offset;
+ auto padding_info = get_padding_info({ input, output });
- auto win_config = validate_and_configure_window(input, output, _num_elems_processed_per_iteration);
+ _height_offset = height_offset;
// Add build options
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0));
+
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(_num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2)));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
{
@@ -115,12 +102,14 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c
_kernel = create_kernel(compile_context, "concatenate_height", build_opts.options());
// Configure kernel window
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- ICLKernel::configure_internal(std::get<1>(win_config));
+ // The window needs to be based on input as we copy all the heights of input
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
// Set output valid region
output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index c877e8fd1f..babf1c4b91 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -495,26 +495,41 @@ std::pair<int32_t, int32_t> get_quantized_activation_min_max(ActivationLayerInfo
return std::make_pair(min_activation, max_activation);
}
-std::unordered_map<const ITensor *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors)
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors)
{
- std::unordered_map<const ITensor *, PaddingSize> res;
+ std::unordered_map<const ITensorInfo *, PaddingSize> res;
for(const ITensor *tensor : tensors)
{
if(tensor)
{
- res.insert({ tensor, tensor->info()->padding() });
+ res.insert({ tensor->info(), tensor->info()->padding() });
}
}
return res;
}
-bool has_padding_changed(const std::unordered_map<const ITensor *, PaddingSize> &padding_map)
+std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensorInfo *> infos)
{
- return std::find_if(padding_map.begin(), padding_map.end(), [](const std::pair<const ITensor *, PaddingSize> &padding_info)
+ std::unordered_map<const ITensorInfo *, PaddingSize> res;
+
+ for(const ITensorInfo *info : infos)
+ {
+ if(info)
+ {
+ res.insert({ info, info->padding() });
+ }
+ }
+
+ return res;
+}
+
+bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map)
+{
+ return std::find_if(padding_map.begin(), padding_map.end(), [](const std::pair<const ITensorInfo *, PaddingSize> &padding_info)
{
- return (padding_info.first->info()->padding() != padding_info.second);
+ return (padding_info.first->padding() != padding_info.second);
})
!= padding_map.end();
}