aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-02-16 12:42:16 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commita086a0a4ddf1bbe17d532cc30be981b51034311e (patch)
treebe3d979742432670d05aaf3f79e843bd05c7ac12
parentde5a1cc7e5c929b19fb1d3ed7d0d8783b9ac6860 (diff)
downloadComputeLibrary-a086a0a4ddf1bbe17d532cc30be981b51034311e.tar.gz
COMPMID-765 Move direct convolution output stage to the right file
Change-Id: Ifb4d27ba05aa618babb79b1f8e95fbfa689c5f3a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120792 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--src/core/CL/CLKernelLibrary.cpp2
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution_quantized.cl62
-rw-r--r--src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl62
3 files changed, 63 insertions, 63 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 4e090debc8..e9ee23f4d7 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -294,7 +294,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "NV21_to_RGB888_bt709", "color_convert.cl" },
{ "NV21_to_RGBA8888_bt709", "color_convert.cl" },
{ "NV21_to_YUV444_bt709", "color_convert.cl" },
- { "output_stage_quantized", "depthwise_convolution_quantized.cl" },
+ { "output_stage_quantized", "direct_convolution_1x1_3x3_5x5_quantized.cl" },
{ "permute_201", "permute.cl" },
{ "permute_120", "permute.cl" },
{ "permute_3201", "permute.cl" },
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index cd7f4f83d5..e4345817fc 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -254,64 +254,4 @@ __kernel void depthwise_convolution_3x3_quantized(
vstore8(pixels, 0, dst.ptr);
}
-#endif //defined(CONV_STRIDE_X)
-
-/** This function computes the output stage of a depthwise convolution.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] bias_ptr (Optional) Pointer to the biases vector. Supported data types: S32
- * @param[in] bias_stride_x (Optional) Stride of the biases vector in X dimension (in bytes)
- * @param[in] bias_step_x (Optional) bias_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector
- * @param[in] output_offset Quantized offset of zero point of the output tensor data range
- * @param[in] output_multiplier Output scale multiplier
- * @param[in] output_shift Output scale divisor exponent
- */
-
-__kernel void output_stage_quantized(
- TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst),
-#if defined(HAS_BIAS)
- VECTOR_DECLARATION(bias),
-#endif //defined(HAS_BIAS)
- int output_offset,
- int output_multiplier,
- int output_shift)
-{
- Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
-#if defined(HAS_BIAS)
- Vector bias = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bias);
-#endif //defined(HAS_BIAS)
-
- // Load input
- int16 vals = vload16(0, (__global int *)(src.ptr));
-
-#if defined(HAS_BIAS)
- // Load and add bias
- int bias_value = *((__global int *)(vector_offset(&bias, get_global_id(2))));
- vals += (int16)(bias_value);
-#endif //defined(HAS_BIAS)
-
- vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, 16);
- vals = vals + output_offset;
- vals = clamp(vals, 0, 255);
-
- // Store result in dst
- vstore16(convert_uchar16(vals), 0, (__global uchar *)dst.ptr);
-} \ No newline at end of file
+#endif //defined(CONV_STRIDE_X) \ No newline at end of file
diff --git a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
index d0cf032ac1..533bc75bcb 100644
--- a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
+++ b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -247,3 +247,63 @@ __kernel void direct_convolution_1x1_3x3_5x5_quantized(
vstore8(convert_uchar8_sat(pixels0), 0, (__global uchar *)dst.ptr);
}
#endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
+
+/** This function computes the output stage of a depthwise convolution.
+ *
+ * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] bias_ptr (Optional) Pointer to the biases vector. Supported data types: S32
+ * @param[in] bias_stride_x (Optional) Stride of the biases vector in X dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector
+ * @param[in] output_offset Quantized offset of zero point of the output tensor data range
+ * @param[in] output_multiplier Output scale multiplier
+ * @param[in] output_shift Output scale divisor exponent
+ */
+
+__kernel void output_stage_quantized(
+ TENSOR3D_DECLARATION(src),
+ TENSOR3D_DECLARATION(dst),
+#if defined(HAS_BIAS)
+ VECTOR_DECLARATION(bias),
+#endif //defined(HAS_BIAS)
+ int output_offset,
+ int output_multiplier,
+ int output_shift)
+{
+ Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+ Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+#if defined(HAS_BIAS)
+ Vector bias = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bias);
+#endif //defined(HAS_BIAS)
+
+ // Load input
+ int16 vals = vload16(0, (__global int *)(src.ptr));
+
+#if defined(HAS_BIAS)
+ // Load and add bias
+ int bias_value = *((__global int *)(vector_offset(&bias, get_global_id(2))));
+ vals += (int16)(bias_value);
+#endif //defined(HAS_BIAS)
+
+ vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, 16);
+ vals = vals + output_offset;
+ vals = clamp(vals, 0, 255);
+
+ // Store result in dst
+ vstore16(convert_uchar16(vals), 0, (__global uchar *)dst.ptr);
+}