aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-24 11:25:32 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitc1a72451273ec019e3e74c4b53ea847afe8ddf7c (patch)
treeb4bd62a7ccd22a2c60070d7fa23ceba794dcac5c /src
parent6a8d3b6db13042a859972c33cf40cfeb6d7cfcda (diff)
downloadComputeLibrary-c1a72451273ec019e3e74c4b53ea847afe8ddf7c.tar.gz
COMPMID-1332: Implement Slice for CL
Change-Id: I0dbc4fd7f640d31daa1970eb3da0e941cb771f2b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146145 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp6
-rw-r--r--src/core/CL/cl_kernels/slice_ops.cl (renamed from src/core/CL/cl_kernels/strided_slice.cl)46
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp21
-rw-r--r--src/core/utils/helpers/tensor_transform.cpp24
-rw-r--r--src/runtime/CL/functions/CLSlice.cpp61
5 files changed, 140 insertions, 18 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 29fd672a96..0cc6e320bf 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -361,7 +361,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "softmax_layer_max_shift_exp_sum_quantized_parallel", "softmax_layer_quantized.cl" },
{ "softmax_layer_max_shift_exp_sum_serial", "softmax_layer.cl" },
{ "softmax_layer_max_shift_exp_sum_parallel", "softmax_layer.cl" },
- { "strided_slice", "strided_slice.cl" },
+ { "strided_slice", "slice_ops.cl" },
{ "suppress_non_maximum", "canny.cl" },
{ "tablelookup_U8", "tablelookup.cl" },
{ "tablelookup_S16", "tablelookup.cl" },
@@ -742,8 +742,8 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/softmax_layer_quantized.clembed"
},
{
- "strided_slice.cl",
-#include "./cl_kernels/strided_slice.clembed"
+ "slice_ops.cl",
+#include "./cl_kernels/slice_ops.clembed"
},
{
"tablelookup.cl",
diff --git a/src/core/CL/cl_kernels/strided_slice.cl b/src/core/CL/cl_kernels/slice_ops.cl
index 7c68fb9a07..bc3df47345 100644
--- a/src/core/CL/cl_kernels/strided_slice.cl
+++ b/src/core/CL/cl_kernels/slice_ops.cl
@@ -61,25 +61,47 @@ __kernel void strided_slice(
Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
- int offset_0 = 0;
- int offset_1 = 0;
- int offset_2 = 0;
- int offset_3 = 0;
+ int offset = 0;
- // Calculate offset
-#if defined(START_0) && defined(STRIDE_0)
- offset_0 = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+ // Offset X
+#if defined(START_0) && defined(STRIDE_0) && defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ offset = (int)START_0 + min(xi, (int)LAST_ACCESSED_X);
+ input.ptr += offset * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+#elif defined(START_0) && defined(STRIDE_0)
+ offset = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+ input.ptr += offset * input_stride_x;
#endif // defined(START_0) && defined(STRIDE_0)
+
+ // Offset Y
#if defined(START_1) && defined(STRIDE_1)
- offset_1 = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+ offset = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+ input.ptr += offset * input_stride_y;
#endif // defined(START_1) && defined(STRIDE_1)
+
+ // Offset Z
#if defined(START_2) && defined(STRIDE_2)
- offset_2 = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+ offset = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+ input.ptr += offset * input_stride_z;
#endif // defined(START_2) && defined(STRIDE_2)
+
+ // Offset depth
#if defined(START_3) && defined(STRIDE_3)
- offset_3 = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
-#endif // defined(START_2) && defined(STRIDE_2)
+ offset = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
+ input.ptr += offset * input_stride_w;
+#endif // defined(START_3) && defined(STRIDE_3)
// Store result
- *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)tensor4D_offset(&input, offset_0, offset_1, offset_2, offset_3));
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input.ptr));
+
+ VSTORE(VEC_SIZE)
+ (val, 0, (__global DATA_TYPE *)(output.ptr));
+#else // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr));
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index f07436ac60..2d2ba103e5 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -55,10 +55,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(ends.num_dimensions() > input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(strides.num_dimensions() > input->num_dimensions());
- for(unsigned int i = 0; i < strides.num_dimensions(); ++i)
+ ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(strides[i] == 0);
- }
+ return i == 0;
+ }));
// Get expected output shape
const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input,
@@ -120,6 +120,19 @@ void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output,
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+
+ // Enable multiple elements processing along x if stride_x is 1 and output width greater than the access vector size
+ const int vec_size_x = 16 / input->info()->element_size();
+ const int output_width_x = output->info()->tensor_shape().x();
+ const bool multi_access_x = (final_strides.x() == 1) && (output_width_x / vec_size_x > 0);
+
+ // Update window if needed
+ if(multi_access_x)
+ {
+ Window &updated_window = std::get<1>(win_config);
+ updated_window.set(Window::DimX,
+ Window::Dimension(updated_window.x().start(), ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x));
+ }
ICLKernel::configure_internal(win_config.second);
// Create build options
@@ -130,6 +143,8 @@ void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output,
build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(starts_abs[i]));
build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(final_strides[i]));
}
+ build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option_if_else(input_shape.num_dimensions() > 2,
"-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()),
"-DSRC_DEPTH=1");
diff --git a/src/core/utils/helpers/tensor_transform.cpp b/src/core/utils/helpers/tensor_transform.cpp
index 5c83a8bdb5..a4bce5da5a 100644
--- a/src/core/utils/helpers/tensor_transform.cpp
+++ b/src/core/utils/helpers/tensor_transform.cpp
@@ -29,6 +29,30 @@ namespace helpers
{
namespace tensor_transform
{
+Coordinates slice_absolute_end_coords(TensorShape input_shape, Coordinates ends)
+{
+ // Create end mask
+ int32_t end_mask = 0;
+ for(unsigned int i = 0; i < ends.num_dimensions(); ++i)
+ {
+ if(ends[i] < 0)
+ {
+ end_mask |= 1 << i;
+ }
+ }
+ // Get unit strides
+ const BiStrides unit_strides = strided_slice_strides(input_shape, BiStrides());
+
+ return strided_slice_absolute_end_coords(input_shape, Coordinates(), ends, unit_strides, end_mask);
+}
+
+TensorShape compute_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends_abs)
+{
+ // Get unit strides
+ const BiStrides unit_strides = strided_slice_strides(input_shape, BiStrides());
+ return compute_strided_slice_output_shape(input_shape, starts, ends_abs, unit_strides);
+}
+
Coordinates strided_slice_absolute_start_coords(TensorShape input_shape, Coordinates starts, Coordinates strides, int32_t begin_mask)
{
Coordinates starts_abs;
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
new file mode 100644
index 0000000000..bef7eca71c
--- /dev/null
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+ // Get absolute end coordinates
+ const Coordinates ends_abs = arm_compute::helpers::tensor_transform::slice_absolute_end_coords(input->info()->tensor_shape(), ends);
+
+ auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>();
+ k->configure(input, output, starts, ends_abs, BiStrides(), 0, 0, 0);
+ _kernel = std::move(k);
+}
+
+Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+
+ // Check start dimensions for being non-negative
+ ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(starts.cbegin(), starts.cbegin() + starts.num_dimensions(), [](int i)
+ {
+ return i < 0;
+ }));
+
+ // Get absolute end coordinates
+ const Coordinates ends_abs = arm_compute::helpers::tensor_transform::slice_absolute_end_coords(input->tensor_shape(), ends);
+
+ return CLStridedSliceKernel::validate(input, output, starts, ends_abs, BiStrides(), 0, 0, 0);
+}
+} // namespace arm_compute