diff options
7 files changed, 40 insertions, 28 deletions
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h index 85deeaef37..da786d71df 100644 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -60,7 +60,8 @@ public: * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type * @param[out] output Output tensor. * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h index c2a55e4bfb..45fd0c7365 100644 --- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h @@ -47,7 +47,8 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QS8/QS16/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl index c94f81e390..cbe826639d 100644 --- a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl +++ b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl @@ -176,7 +176,7 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel) * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor - * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_ptr Pointer to the biases tensor. Supported data types: S32 * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor @@ -236,10 +236,9 @@ __kernel void direct_convolution_1x1_3x3_5x5_quantized( } #ifdef HAS_BIAS - Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); - __global uchar *bias_addr = ((__global uchar *)(vector_offset(&biases, kernel_index))); - uchar8 bias_data = *bias_addr; - pixels0 += convert_int8(bias_data); + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); + __global int *bias_addr = ((__global int *)(vector_offset(&biases, kernel_index))); + pixels0 += (int8)(*bias_addr); #endif /* defined(HAS_BIAS) */ pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels0, output_multiplier, output_shift, 8); diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 5f109f76af..d0b5b9373f 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -65,7 +65,14 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); + } + else + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + } ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); } diff --git a/tests/validation/CPP/ConvolutionLayer.cpp b/tests/validation/CPP/ConvolutionLayer.cpp index aa73869a0e..95852b0f42 100644 --- a/tests/validation/CPP/ConvolutionLayer.cpp +++ b/tests/validation/CPP/ConvolutionLayer.cpp @@ -48,8 +48,8 @@ inline bool is_valid_pixel(int i, int min, int max) } // 3D convolution for floating point type -template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> -void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, SimpleTensor<T> &out, +template < typename T, typename TB, typename std::enable_if < is_floating_point<T>::value &&is_floating_point<TB>::value, int >::type = 0 > +void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out, int i_offset, int w_offset, int b_offset, int o_offset, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) { @@ -95,8 +95,8 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co } // 3D convolution for fixed point type -template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0> -void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, SimpleTensor<T> &out, +template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 > +void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out, int i_offset, int w_offset, int b_offset, int o_offset, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) { @@ -152,13 +152,13 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co // 3D convolution for QASYMM8 type template <> -void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<uint8_t> &bias, SimpleTensor<uint8_t> &out, +void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out, int i_offset, int w_offset, int b_offset, int o_offset, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) { const uint8_t *in_ptr = in.data() + i_offset; const uint8_t *w_ptr = weights.data() + w_offset; - const uint8_t *b_ptr = bias.data() + b_offset; + const int32_t *b_ptr = bias.data() + b_offset; uint8_t *out_ptr = out.data() + o_offset; const int input_offset = -in.quantization_info().offset; @@ -218,8 +218,8 @@ void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> } } // namespace -template <typename T> -SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape, const PadStrideInfo &info) +template <typename T, typename TB> +SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info) { // Create reference SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; @@ -286,7 +286,7 @@ template SimpleTensor<qint8_t> convolution_layer(const SimpleTensor<qint8_t> &sr const PadStrideInfo &info); template SimpleTensor<qint16_t> convolution_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &output_shape, const PadStrideInfo &info); -template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<uint8_t> &bias, const TensorShape &output_shape, +template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape, const PadStrideInfo &info); } // namespace reference } // namespace validation diff --git a/tests/validation/CPP/ConvolutionLayer.h b/tests/validation/CPP/ConvolutionLayer.h index 117e846b1c..57455ba401 100644 --- a/tests/validation/CPP/ConvolutionLayer.h +++ b/tests/validation/CPP/ConvolutionLayer.h @@ -35,8 +35,8 @@ namespace validation { namespace reference { -template <typename T> -SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape, const PadStrideInfo &info); +template <typename T, typename TB> +SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h index e302657158..279a4897eb 100644 --- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h @@ -44,6 +44,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DirectConvolutionValidationGenericFixture : public framework::Fixture { public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type; + +public: template <typename...> void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, int fractional_bits, QuantizationInfo quantization_info) @@ -55,10 +58,11 @@ public: const TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); - const TensorShape output_shape = get_output_shape(input_shape, weights_shape, info); + const TensorShape output_shape = get_output_shape(input_shape, weights_shape, info); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, fractional_bits, quantization_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, fractional_bits, quantization_info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); } protected: @@ -86,12 +90,12 @@ protected: } TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, int fixed_point_position, QuantizationInfo quantization_info) + DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info) { // Create tensors TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position, quantization_info); TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position, quantization_info); - TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1, fixed_point_position, quantization_info); + TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, fixed_point_position, quantization_info); TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position, quantization_info); // Create and configure function @@ -126,12 +130,12 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, int fixed_point_position, QuantizationInfo quantization_info) + DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info) { // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position, quantization_info }; - SimpleTensor<T> bias{ bias_shape, data_type, 1, fixed_point_position, quantization_info }; + SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position, quantization_info }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position, quantization_info }; + SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, fixed_point_position, quantization_info }; // Fill reference fill(src, 0); |