aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL')
-rw-r--r--src/core/CL/cl_kernels/convolution_layer.cl25
-rw-r--r--src/core/CL/cl_kernels/gemm.cl11
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp8
6 files changed, 40 insertions, 14 deletions
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index a5cbe3d5c4..a875911140 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "fixed_point.h"
#include "helpers.h"
/** This kernel reshapes the tensor's low three dimensions to single column
@@ -99,7 +100,7 @@ __kernel void reshape_to_columns(
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -148,17 +149,21 @@ __kernel void im2col_generic(
}
}
-#if defined(HAS_BIAS)
- *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)1;
-#endif /* HAS_BIAS */
+#ifdef HAS_BIAS
+#ifdef FIXED_POINT_POSITION
+ *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)(1 << FIXED_POINT_POSITION);
+#else // FIXED_POINT_POSITION
+ *((__global DATA_TYPE *)output_ptr) = 1.0f;
+#endif // FIXED_POINT_POSITION
+#endif // HAS_BIAS
}
-#endif //(CONVOLVED_WIDTH && STRIDE_X && STRIDE_Y && PAD_X && PAD_Y && KERNEL_WIDTH && KERNEL_HEIGHT && KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT)
+#endif //defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT)
/** This kernel performs a reshaping of the output of the convolution layer.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -192,7 +197,7 @@ __kernel void col2im(
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
* @note In case biases will be added in late stage, -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -225,7 +230,11 @@ __kernel void im2col_reduced(
if(get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1))
{
tmp_out_ptr += dst_stride_x;
+#ifdef FIXED_POINT_POSITION
+ *((__global DATA_TYPE *)tmp_out_ptr) = (DATA_TYPE)(1 << FIXED_POINT_POSITION);
+#else // FIXED_POINT_POSITION
*((__global DATA_TYPE *)tmp_out_ptr) = (DATA_TYPE)1;
+#endif // FIXED_POINT_POSITION
}
-#endif /* HAS_BIAS */
+#endif // HAS_BIAS
}
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 46f1645aa7..db15720ad0 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -21,9 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "fixed_point.h"
#include "helpers.h"
+#ifdef FIXED_POINT_POSITION
+#include "fixed_point.h"
+#endif // FIXED_POINT_POSITION
+
/** This OpenCL kernel computes the "vector" 1x4 transposition of input matrix
*
* @param[in] src_ptr Pointer to the source matrix. Supported data types: U32/S32/F32
@@ -274,7 +277,11 @@ __kernel void gemm_accumulate_biases(
accum_value = vload16(0, (__global DATA_TYPE *)accum.ptr);
VEC_DATA_TYPE(DATA_TYPE, 16)
biases_value = vload16(0, (__global DATA_TYPE *)biases.ptr);
- accum_value = biases_value + accum_value;
+#ifdef FIXED_POINT_POSITION
+ accum_value = ADD_SAT_OP_EXPAND(biases_value, accum_value, DATA_TYPE, 16);
+#else // FIXED_POINT_POSITION
+ accum_value = biases_value + accum_value;
+#endif // FIXED_POINT_POSITION
// Store result in the accummulate buffer
vstore16(accum_value, 0, (__global DATA_TYPE *)accum.ptr);
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 679943ba3e..6b2a18b261 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -43,9 +43,9 @@ CLCol2ImKernel::CLCol2ImKernel()
void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
_input = input;
_output = output;
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index 75c1a6e629..a7ca6f2f01 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -53,6 +53,10 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe
std::set<std::string> build_opts;
build_opts.insert(("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type())));
+ if(accum->info()->data_type() == DataType::QS8)
+ {
+ build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(accum->info()->fixed_point_position()));
+ }
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts));
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
index 73c8429055..27b215f2c8 100644
--- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
@@ -56,7 +56,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
- const unsigned int num_elems_processed_per_iteration = max_cl_vector_width / data_size_from_type(input->info()->data_type());
+ const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
const float scale_x = num_elems_processed_per_iteration;
ARM_COMPUTE_ERROR_ON((0 == static_cast<int>(input->info()->dimension(0) * (1.f / scale_x))));
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 092f495f92..51922e0925 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -46,8 +46,9 @@ CLIm2ColKernel::CLIm2ColKernel()
void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
_input = input;
_output = output;
@@ -57,6 +58,11 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace((has_bias ? "-DHAS_BIAS" : ""));
+ if(input->info()->data_type() == DataType::QS8)
+ {
+ build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+ }
+
int pad_x = 0;
int pad_y = 0;
int stride_x = 0;