aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2018-07-04 09:34:00 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commit7485d5a62685cb745ab50e970adb722cb71557ac (patch)
treeba01b99ca466c93edc9a3f8c1e34394ff84be060
parent014333d73883c3872e458cedda5ccef586a7ccd4 (diff)
downloadComputeLibrary-7485d5a62685cb745ab50e970adb722cb71557ac.tar.gz
COMPMID-970 : Remove QS8 / QS16 support
Removed fixed point related code. Change-Id: I487acf138dace3b0450e0d72ca7071eaec254566 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/137678 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h12
-rw-r--r--arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h14
-rw-r--r--arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLCol2ImKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h9
-rw-r--r--arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h8
-rw-r--r--arm_compute/core/CL/kernels/CLFillBorderKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLIm2ColKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLPermuteKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h12
-rw-r--r--arm_compute/core/CL/kernels/CLPoolingLayerKernel.h5
-rw-r--r--arm_compute/core/CL/kernels/CLReshapeLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h16
-rw-r--r--arm_compute/core/CL/kernels/CLTransposeKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h4
-rw-r--r--arm_compute/core/CPP/kernels/CPPPermuteKernel.h4
-rw-r--r--arm_compute/core/FixedPoint.h373
-rw-r--r--arm_compute/core/FixedPoint.inl406
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h2
-rw-r--r--arm_compute/core/Helpers.h24
-rw-r--r--arm_compute/core/Helpers.inl14
-rw-r--r--arm_compute/core/ITensorInfo.h15
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.h1184
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.inl1958
-rw-r--r--arm_compute/core/NEON/kernels/NEActivationLayerKernel.h17
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h20
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h20
-rw-r--r--arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h21
-rw-r--r--arm_compute/core/NEON/kernels/NECol2ImKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h13
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NEFillBorderKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEIm2ColKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h16
-rw-r--r--arm_compute/core/NEON/kernels/NEPermuteKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h24
-rw-r--r--arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h50
-rw-r--r--arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NETransposeKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h14
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h249
-rw-r--r--arm_compute/core/SubTensorInfo.h11
-rw-r--r--arm_compute/core/TensorInfo.h54
-rw-r--r--arm_compute/core/Types.h2
-rw-r--r--arm_compute/core/Utils.h30
-rw-r--r--arm_compute/core/Validate.h156
-rw-r--r--arm_compute/graph/TypePrinter.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLArithmeticAddition.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLCopy.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConvertLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLFlattenLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizationLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLPermute.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLPoolingLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLReshapeLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLTranspose.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h4
-rw-r--r--arm_compute/runtime/CPP/functions/CPPPermute.h6
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NECol2Im.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEFillBorder.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEIm2Col.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NENormalizationLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEPermute.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEReshapeLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NESoftmaxLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NETranspose.h6
-rw-r--r--src/core/CL/CLHelpers.cpp10
-rw-r--r--src/core/CL/CLKernelLibrary.cpp10
-rw-r--r--src/core/CL/cl_kernels/activation_layer.cl22
-rw-r--r--src/core/CL/cl_kernels/arithmetic_op.cl10
-rw-r--r--src/core/CL/cl_kernels/batchnormalization_layer.cl17
-rw-r--r--src/core/CL/cl_kernels/channel_shuffle.cl2
-rw-r--r--src/core/CL/cl_kernels/col2im.cl42
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl4
-rw-r--r--src/core/CL/cl_kernels/convert_fc_weights.cl2
-rw-r--r--src/core/CL/cl_kernels/convolution_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/depth_convert.cl37
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl4
-rw-r--r--src/core/CL/cl_kernels/dequantization_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/direct_convolution1x1.cl14
-rw-r--r--src/core/CL/cl_kernels/direct_convolution3x3.cl17
-rw-r--r--src/core/CL/cl_kernels/fill_border.cl4
-rw-r--r--src/core/CL/cl_kernels/fixed_point.h518
-rw-r--r--src/core/CL/cl_kernels/gemm.cl705
-rw-r--r--src/core/CL/cl_kernels/im2col.cl36
-rw-r--r--src/core/CL/cl_kernels/l2_normalize.cl6
-rw-r--r--src/core/CL/cl_kernels/normalization_layer.cl26
-rw-r--r--src/core/CL/cl_kernels/permute.cl6
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_int.cl21
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl48
-rw-r--r--src/core/CL/cl_kernels/reshape_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/softmax_layer.cl28
-rw-r--r--src/core/CL/cl_kernels/softmax_layer_quantized.cl4
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp21
-rw-r--r--src/core/CL/kernels/CLArithmeticAdditionKernel.cpp14
-rw-r--r--src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp14
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp18
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp13
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLFloorKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp5
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp17
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp19
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp31
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLReshapeLayerKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp15
-rw-r--r--src/core/CL/kernels/CLTransposeKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp3
-rw-r--r--src/core/CPP/kernels/CPPPermuteKernel.cpp5
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp3
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp6
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp3
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp3
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp1
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp3
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp10
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp11
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp235
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp54
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp54
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp118
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp17
-rw-r--r--src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp187
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp310
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp116
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEFloorKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp30
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp61
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp654
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp44
-rw-r--r--src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEMinMaxLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp150
-rw-r--r--src/core/NEON/kernels/NEPermuteKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp154
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp563
-rw-r--r--src/core/NEON/kernels/NEQuantizationLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEReshapeLayerKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp166
-rw-r--r--src/core/NEON/kernels/NETransposeKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp4
-rw-r--r--src/core/TensorInfo.cpp56
-rw-r--r--src/core/Utils.cpp8
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp6
-rw-r--r--src/runtime/CL/functions/CLDepthConcatenateLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp19
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp3
-rw-r--r--src/runtime/CL/functions/CLWidthConcatenateLayer.cpp4
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp4
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp6
-rw-r--r--src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp33
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp5
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp8
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp21
-rw-r--r--src/runtime/NEON/functions/NENormalizationLayer.cpp4
-rw-r--r--tests/CL/CLAccessor.h6
-rw-r--r--tests/GLES_COMPUTE/GCAccessor.h6
-rw-r--r--tests/IAccessor.h6
-rw-r--r--tests/NEON/Accessor.h6
-rw-r--r--tests/SimpleTensor.h11
-rw-r--r--tests/Types.h12
-rw-r--r--tests/Utils.h1
-rw-r--r--tests/benchmark/NEON/DirectConvolutionLayer.cpp2
-rw-r--r--tests/validation/CL/ActivationLayer.cpp27
-rw-r--r--tests/validation/CL/ArithmeticDivision.cpp6
-rw-r--r--tests/validation/CL/ConvolutionLayer.cpp48
-rw-r--r--tests/validation/CL/DeconvolutionLayer.cpp48
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayer.cpp160
-rw-r--r--tests/validation/CL/DilatedConvolutionLayer.cpp30
-rw-r--r--tests/validation/CL/DirectConvolutionLayer.cpp99
-rw-r--r--tests/validation/CL/LSTMLayer.cpp128
-rw-r--r--tests/validation/CL/LocallyConnected.cpp64
-rw-r--r--tests/validation/CL/NormalizationLayer.cpp24
-rw-r--r--tests/validation/CL/PoolingLayer.cpp32
-rw-r--r--tests/validation/CL/RNNLayer.cpp84
-rw-r--r--tests/validation/CL/WidthConcatenateLayer.cpp24
-rw-r--r--tests/validation/FixedPoint.h997
-rw-r--r--tests/validation/GLES_COMPUTE/ActivationLayer.cpp27
-rw-r--r--tests/validation/GLES_COMPUTE/PoolingLayer.cpp16
-rw-r--r--tests/validation/Helpers.h6
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp24
-rw-r--r--tests/validation/NEON/DeconvolutionLayer.cpp48
-rw-r--r--tests/validation/NEON/DilatedConvolutionLayer.cpp24
-rw-r--r--tests/validation/NEON/DirectConvolutionLayer.cpp72
-rw-r--r--tests/validation/NEON/GEMMLowp.cpp2
-rw-r--r--tests/validation/NEON/LocallyConnected.cpp64
-rw-r--r--tests/validation/NEON/NormalizationLayer.cpp24
-rw-r--r--tests/validation/NEON/PoolingLayer.cpp28
-rw-r--r--tests/validation/NEON/RNNLayer.cpp96
-rw-r--r--tests/validation/NEON/Scale.cpp20
-rw-r--r--tests/validation/Validation.h1
-rw-r--r--tests/validation/fixtures/PoolingLayerFixture.h8
-rw-r--r--tests/validation/fixtures/SoftmaxLayerFixture.h22
-rw-r--r--tests/validation/reference/ArithmeticSubtraction.cpp3
-rw-r--r--tests/validation/reference/BatchNormalizationLayer.cpp1
-rw-r--r--tests/validation/reference/ChannelCombine.cpp1
-rw-r--r--tests/validation/reference/ChannelExtract.cpp1
-rw-r--r--tests/validation/reference/ColorConvert.cpp1
-rw-r--r--tests/validation/reference/Convolution3d.h71
-rw-r--r--tests/validation/reference/ConvolutionLayer.cpp1
-rw-r--r--tests/validation/reference/DeconvolutionLayer.cpp1
-rw-r--r--tests/validation/reference/DepthConcatenateLayer.cpp1
-rw-r--r--tests/validation/reference/DepthConvertLayer.cpp28
-rw-r--r--tests/validation/reference/DepthwiseConvolutionLayer.cpp1
-rw-r--r--tests/validation/reference/FixedPoint.cpp83
-rw-r--r--tests/validation/reference/FixedPoint.h44
-rw-r--r--tests/validation/reference/FlattenLayer.cpp2
-rw-r--r--tests/validation/reference/FullyConnectedLayer.cpp1
-rw-r--r--tests/validation/reference/GEMM.cpp70
-rw-r--r--tests/validation/reference/GEMM.h4
-rw-r--r--tests/validation/reference/GEMMInterleave4x4.h3
-rw-r--r--tests/validation/reference/GEMMInterleaveBlocked.h3
-rw-r--r--tests/validation/reference/GEMMTranspose1xW.h3
-rw-r--r--tests/validation/reference/NormalizationLayer.cpp120
-rw-r--r--tests/validation/reference/NormalizationLayer.h4
-rw-r--r--tests/validation/reference/PixelWiseMultiplication.cpp2
-rw-r--r--tests/validation/reference/PoolingLayer.cpp3
-rw-r--r--tests/validation/reference/SoftmaxLayer.cpp53
-rw-r--r--tests/validation/reference/SoftmaxLayer.h4
-rw-r--r--tests/validation/reference/Transpose.cpp3
-rw-r--r--tests/validation/reference/WidthConcatenateLayer.cpp1
-rw-r--r--utils/TypePrinter.h55
323 files changed, 1353 insertions, 11905 deletions
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index c6d8f96a87..12d00de7e8 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -51,7 +51,7 @@ public:
* @note If the output tensor is a nullptr, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*/
@@ -59,7 +59,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
index a33cbf321f..f4275f4153 100644
--- a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
+++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
@@ -53,17 +53,17 @@ public:
~CLArithmeticAdditionKernel() = default;
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QASYMM8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8 (only if @p input1 is QS8), QASYMM8 (only if @p input1 is QASYMM8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8),QASYMM8 (only if @p input1 is QASYMM8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticAdditionKernel
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QS8/QASYMM8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/QS8 (only if @p input1 is QS8), QASYMM8 (only if @p input1 is QASYMM8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QASYMM8 (only if both inputs are QASYMM8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
index c5f862a61f..35b918fe4b 100644
--- a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
+++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,17 +55,17 @@ public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticSubtractionKernel
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QS8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index 8015f08d1b..9c8d02532a 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -54,7 +54,7 @@ public:
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
@@ -69,7 +69,7 @@ public:
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
index 684a0e5027..f7bd205ec7 100644
--- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
@@ -48,14 +48,14 @@ public:
~CLChannelShuffleLayerKernel() = default;
/** Configure function's inputs and outputs.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*/
void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
/** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
index 3779325efe..94f21b1ebc 100644
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
@@ -66,7 +66,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
@@ -74,7 +74,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
/** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
*
- * @param[in] input The input tensor to convert. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
index fe24aa9d8c..f5e2f0de89 100644
--- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
@@ -55,7 +55,7 @@ public:
~CLConvertFullyConnectedWeightsKernel() = default;
/** Set the input and output tensor.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
@@ -63,7 +63,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index 467bdfab3b..cbcab8f554 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,7 +52,7 @@ public:
~CLDepthConcatenateLayerKernel() = default;
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: F16/F32.
* @param[in] depth_offset The offset on the Z axis.
* @param[in,out] output Output tensor. Data types supported: Same as @p input.
*
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
index 3a6310d69e..7e795c607a 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,17 +43,14 @@ public:
*
* Valid conversions Input -> Output :
*
- * - QS8 -> F32
- * - QS16 -> F32
* - U8 -> U16, S16, U32, S32
* - U16 -> U8, U32, S32
* - S16 -> U8, U32, S32
* - U32 -> U8, U16, S16
* - S32 -> U8, U16, S16
- * - F32 -> QS8, QS16
*
- * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
- * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
+ * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F32.
+ * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32.
* @param[in] policy Conversion policy
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*/
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index eb1bf58b1b..bd37e35334 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -56,7 +56,7 @@ public:
* 5x5 convolution with stride_x = 1/2, stride_y = 1/2
*
* @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
@@ -70,7 +70,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
*
* @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h
index 9340e9a8d8..1947a98ba3 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h
@@ -51,11 +51,11 @@ public:
/** Set the accumulate buffer and the biases of the kernel.
*
* @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: S32/QS16/QS32/F16/F32
+ * Data type supported: S32/QS32/F16/F32
* @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
* Required parameter if output is of QASYMM8 type.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * Data types supported: QASYMM8/F16/F32
* @param[in] result_fixedpoint_multiplier (Optional)Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift (Optional)Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
* @param[in] result_offset_after_shift (Optional)Offset to be applied to result before converting it back to QASYMM8
@@ -65,10 +65,10 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerOutputStageKernel
*
* @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS16/QS32/F16/F32
+ * Data type supported: QS32/F16/F32
* @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: QS8/QS16/F16/F32
+ * Data type supported: F16/F32
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
index 18031c7e7e..20e872eccb 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -51,7 +51,7 @@ public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F16/F32.
+ * @param[in,out] tensor Tensor to process Data types supported: U8/S16/S32/F16/F32.
* @param[in] border_size Size of the border to fill in elements.
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
index c0fef45afe..7f8e766f1a 100644
--- a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
@@ -64,14 +64,14 @@ public:
CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleave block
*/
void configure(const ICLTensor *input, ICLTensor *output, int mult_interleave4x4_height = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMInterleave4x4Kernel
*
- * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
* @param[in] mult_interleave4x4_height Multiplication factor for the height of the 4x4 interleave block
*
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
index 2956f93cdc..f201af0d5e 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,13 +46,13 @@ public:
CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
/** Set the accumulate buffer and the biases of the kernel.
*
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: QS8/QS16/F16/F32
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
* @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
*/
void configure(ICLTensor *accum, const ICLTensor *biases);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
*
- * @param[in] accum The accumulate tensor to convert. Data types supported: QS8/QS16/F16/F32
+ * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32
* @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
* @param[in] gpu_target GPU target
*
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h
index 3755d943c5..bf8e1d4b17 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h
@@ -52,14 +52,14 @@ public:
*
* @note The input and output tensors must have the same dimensions
*
- * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
+ * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
* @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input
* @param[in] beta Weight of matrix C
*/
void configure(const ICLTensor *input, ICLTensor *output, float beta);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAdditionKernel.
*
- * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
+ * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
* @param[in] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input
* @param[in] beta Weight of matrix C
*
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index 15bba0cd0f..1b6a0c87a9 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -53,7 +53,7 @@ public:
CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
/** Initialise the kernel's input, output and alpha
*
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
* @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
* @param[in] alpha Weight of the matrix product
@@ -64,7 +64,7 @@ public:
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
*
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
* @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
* @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
* @param[in] alpha Weight of the matrix product
diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
index 9a3069eab6..47a4ad515b 100644
--- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
@@ -70,14 +70,14 @@ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] mult_transpose1xW_width (Optional) Multiplication factor for the width of the 1xW transposed block
*/
void configure(const ICLTensor *input, ICLTensor *output, int mult_transpose1xW_width = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMTranspose1xWKernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor. Data type supported: same as @p input.
* @param[in] mult_transpose1xW_width Multiplication factor for the width of the 1xW transposed block
*
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
index 7e119a32a8..fc930abcbe 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -69,7 +69,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
* while every dimension above represents a batch. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -81,7 +81,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
* while every dimension above represents a batch. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -113,7 +113,7 @@ private:
/** Chooses and configure the right kernel for the given input arguments.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
* while every dimension above represents a batch. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index ef00e59e5c..f2d37a781c 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -48,7 +48,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32.
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
@@ -56,7 +56,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32.
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32.
* @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*
diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h
index b01df64ebd..21da141c0d 100644
--- a/arm_compute/core/CL/kernels/CLPermuteKernel.h
+++ b/arm_compute/core/CL/kernels/CLPermuteKernel.h
@@ -49,14 +49,14 @@ public:
CLPermuteKernel &operator=(CLPermuteKernel &&) = default;
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
*
- * @param[in] input First tensor input info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32.
+ * @param[in] input First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
* @param[in] output Output tensor info. Data types supported: same as @p input.
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index fcabb614df..b835aa701b 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -49,11 +49,11 @@ public:
CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32.
* @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
*/
@@ -61,11 +61,11 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
*
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
index c13507785b..db1a756229 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -51,16 +51,15 @@ public:
/** Set the input and output tensors.
*
- * @note QS8 and QS16 are supported only for pool sizes 3, 5 and 7
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
index 044b5e7006..b253d66f4f 100644
--- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ public:
~CLReshapeLayerKernel() = default;
/** Set the input and output of the kernel
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
* @param[out] output Destination tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index c562565175..b272878fe7 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -38,13 +38,13 @@ class CLLogits1DMaxKernel : public ICLSimple3DKernel
public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[out] output Destination tensor. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] output Destination tensor. Data types supported: same as @p input
*
* @return a status
@@ -68,7 +68,7 @@ public:
CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
* @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
@@ -77,7 +77,7 @@ public:
void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
* @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
* @param[in] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
@@ -116,7 +116,7 @@ public:
CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: F16/F32
* @param[in,out] max Max values tensor. Data types supported: same as @p input
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
@@ -125,7 +125,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
*
- * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
* @param[in] output Destination tensor. Data types supported: same as @p input
* @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input
@@ -175,7 +175,7 @@ public:
CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/QS16/S32/F16/F32
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32
* @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
* @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. (Default = 1.0)
@@ -183,7 +183,7 @@ public:
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
*
- * @param[in] input Source tensor. Data types supported: QS8/QS16/S32/F16/F32
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32
* @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
* @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
*
diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h
index 2e1b481d3f..09d7a8a430 100644
--- a/arm_compute/core/CL/kernels/CLTransposeKernel.h
+++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,13 +40,13 @@ class CLTransposeKernel : public ICLSimple2DKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor. Data type supported: Same as @p input
*
* @return a status
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index 7a54284199..664fc3c216 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -69,7 +69,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F16/F32
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
@@ -79,7 +79,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F16/F32
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index 5b8a318320..d206eb0da7 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -52,7 +52,7 @@ public:
~CLWidthConcatenateLayerKernel() = default;
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] width_offset The offset on the X axis.
* @param[in,out] output Output tensor. Data types supported: Same as @p input.
*
@@ -60,7 +60,7 @@ public:
void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
*
- * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] width_offset The offset on the X axis.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
*
diff --git a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
index 3d6b43641e..5e9ae43ee0 100644
--- a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h
@@ -56,14 +56,14 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h
deleted file mode 100644
index 6e00500b10..0000000000
--- a/arm_compute/core/FixedPoint.h
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_FIXEDPOINT_H__
-#define __ARM_COMPUTE_FIXEDPOINT_H__
-
-#include <cstdint>
-
-namespace arm_compute
-{
-using qint8_t = int8_t; /**< 8 bit fixed point scalar value */
-using qint16_t = int16_t; /**< 16 bit fixed point scalar value */
-using qint32_t = int32_t; /**< 32 bit fixed point scalar value */
-using qint64_t = int64_t; /**< 64 bit fixed point scalar value */
-
-/** 8 bit fixed point scalar saturating shift left
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] shift Shift amount (positive only values)
- *
- * @return The result of the 8 bit fixed point shift. The result is saturated in case of overflow
- */
-qint8_t sqshl_qs8(qint8_t a, int shift);
-
-/** 8 bit fixed point scalar shift right
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] shift Shift amount (positive only values)
- *
- * @return The result of the 8 bit fixed point shift
- */
-qint8_t sshr_qs8(qint8_t a, int shift);
-
-/** 16 bit fixed point scalar shift right
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] shift Shift amount (positive only values)
- *
- * @return The result of the 16 bit fixed point shift
- */
-qint16_t sshr_qs16(qint16_t a, int shift);
-
-/** 16 bit fixed point scalar saturating shift left
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] shift Shift amount (positive only values)
- *
- * @return The result of the 16 bit fixed point shift. The result is saturated in case of overflow
- */
-qint16_t sqshl_qs16(qint16_t a, int shift);
-
-/** 8 bit fixed point scalar absolute value
- *
- * @param[in] a 8 bit fixed point input
- *
- * @return The result of the 8 bit fixed point absolute value
- */
-qint8_t sabs_qs8(qint8_t a);
-
-/** 16 bit fixed point scalar absolute value
- *
- * @param[in] a 16 bit fixed point input
- *
- * @return The result of the 16 bit fixed point absolute value
- */
-qint16_t sabs_qs16(qint16_t a);
-
-/** 8 bit fixed point scalar add
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- *
- * @return The result of the 8 bit fixed point addition
- */
-qint8_t sadd_qs8(qint8_t a, qint8_t b);
-
-/** 16 bit fixed point scalar add
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- *
- * @return The result of the 16 bit fixed point addition
- */
-qint16_t sadd_qs16(qint16_t a, qint16_t b);
-
-/** 8 bit fixed point scalar saturating add
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- *
- * @return The result of the 8 bit fixed point addition. The result is saturated in case of overflow
- */
-qint8_t sqadd_qs8(qint8_t a, qint8_t b);
-
-/** 16 bit fixed point scalar saturating add
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- *
- * @return The result of the 16 bit fixed point addition. The result is saturated in case of overflow
- */
-qint16_t sqadd_qs16(qint16_t a, qint16_t b);
-
-/** 32 bit fixed point scalar saturating add
- *
- * @param[in] a First 32 bit fixed point input
- * @param[in] b Second 32 bit fixed point input
- *
- * @return The result of the 32 bit fixed point addition. The result is saturated in case of overflow
- */
-qint32_t sqadd_qs32(qint32_t a, qint32_t b);
-
-/** 8 bit fixed point scalar subtraction
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- *
- * @return The result of the 8 bit fixed point subtraction
- */
-qint8_t ssub_qs8(qint8_t a, qint8_t b);
-
-/** 16 bit fixed point scalar subtraction
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- *
- * @return The result of the 16 bit fixed point subtraction
- */
-qint16_t ssub_qs16(qint16_t a, qint16_t b);
-
-/** 8 bit fixed point scalar saturating subtraction
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- *
- * @return The result of the 8 bit fixed point subtraction. The result is saturated in case of overflow
- */
-qint8_t sqsub_qs8(qint8_t a, qint8_t b);
-
-/** 16 bit fixed point scalar saturating subtraction
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- *
- * @return The result of the 16 bit fixed point subtraction. The result is saturated in case of overflow
- */
-qint16_t sqsub_qs16(qint16_t a, qint16_t b);
-
-/** 8 bit fixed point scalar multiply
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point multiplication.
- */
-qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position);
-
-/** 16 bit fixed point scalar multiply
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point multiplication.
- */
-qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position);
-
-/** 8 bit fixed point scalar saturating multiply
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point multiplication. The result is saturated in case of overflow
- */
-qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position);
-
-/** 16 bit fixed point scalar saturating multiply
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow
- */
-qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position);
-
-/** 8 bit fixed point scalar multiply long
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point multiplication long. The result is saturated in case of overflow
- */
-qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position);
-
-/** 16 bit fixed point scalar multiply long
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point multiplication long. The result is saturated in case of overflow
- */
-qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position);
-
-/** 16 bit fixed point scalar saturating multiply
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow
- */
-qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position);
-
-/** 8 bit fixed point scalar inverse square root
- *
- * @param[in] a 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point inverse square root.
- */
-qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position);
-
-/** 16 bit fixed point scalar inverse square root
- *
- * @param[in] a 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point inverse square root.
- */
-qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position);
-
-/** 8 bit fixed point scalar division
- *
- * @param[in] a First 8 bit fixed point input
- * @param[in] b Second 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point division.
- */
-qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position);
-
-/** 16 bit fixed point scalar division
- *
- * @param[in] a First 16 bit fixed point input
- * @param[in] b Second 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point division.
- */
-qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position);
-
-/** 8 bit fixed point scalar exponential
- *
- * @param[in] a 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point exponential.
- */
-qint8_t sqexp_qs8(qint8_t a, int fixed_point_position);
-
-/** 16 bit fixed point scalar exponential
- *
- * @param[in] a 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point exponential.
- */
-qint16_t sqexp_qs16(qint16_t a, int fixed_point_position);
-
-/** 16 bit fixed point scalar exponential
- *
- * @param[in] a 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point exponential.
- */
-qint16_t sexp_qs16(qint16_t a, int fixed_point_position);
-
-/** 8 bit fixed point scalar logarithm
- *
- * @param[in] a 8 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point logarithm.
- */
-qint8_t slog_qs8(qint8_t a, int fixed_point_position);
-
-/** 16 bit fixed point scalar logarithm
- *
- * @param[in] a 16 bit fixed point input
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point logarithm.
- */
-qint16_t slog_qs16(qint16_t a, int fixed_point_position);
-
-/** Convert an 8 bit fixed point to float
- *
- * @param[in] a Input to convert
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 8 bit fixed point -> float
- */
-float scvt_f32_qs8(qint8_t a, int fixed_point_position);
-
-/** Convert a float to 8 bit fixed point
- *
- * @param[in] a Input to convert
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 8 bit fixed point
- */
-qint8_t sqcvt_qs8_f32(float a, int fixed_point_position);
-
-/** Convert a 16 bit fixed point to float
- *
- * @param[in] a Input to convert
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 16 bit fixed point -> float
- */
-float scvt_f32_qs16(qint16_t a, int fixed_point_position);
-
-/** Convert a float to 16 bit fixed point
- *
- * @param[in] a Input to convert
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 16 bit fixed point
- */
-qint16_t sqcvt_qs16_f32(float a, int fixed_point_position);
-
-/** Scalar saturating move and narrow.
- *
- * @param[in] a Input to convert to 8 bit fixed point
- *
- * @return The narrowing conversion to 8 bit
- */
-qint8_t sqmovn_qs16(qint16_t a);
-
-/** Scalar saturating move and narrow.
- *
- * @param[in] a Input to convert to 16 bit fixed point
- *
- * @return The narrowing conversion to 16 bit
- */
-qint16_t sqmovn_qs32(qint32_t a);
-}
-#include "arm_compute/core/FixedPoint.inl"
-#endif /* __ARM_COMPUTE_FIXEDPOINT_H__ */
diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl
deleted file mode 100644
index eb3516e8db..0000000000
--- a/arm_compute/core/FixedPoint.inl
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/misc/Utility.h"
-
-#include <cmath>
-#include <limits>
-
-namespace arm_compute
-{
-inline qint8_t sqshl_qs8(qint8_t a, int shift)
-{
- qint16_t tmp = static_cast<qint16_t>(a) << shift;
-
- // Saturate the result in case of overflow and cast to qint8_t
- return utility::saturate_cast<qint8_t>(tmp);
-}
-
-inline qint16_t sqshl_qs16(qint16_t a, int shift)
-{
- qint32_t tmp = static_cast<qint32_t>(a) << shift;
-
- // Saturate the result in case of overflow and cast to qint16_t
- return utility::saturate_cast<qint16_t>(tmp);
-}
-
-inline qint8_t sshr_qs8(qint8_t a, int shift)
-{
- ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero");
- const qint8_t round_val = 1 << (shift - 1);
- return sqadd_qs8(a, round_val) >> shift;
-}
-
-inline qint16_t sshr_qs16(qint16_t a, int shift)
-{
- ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero");
- const qint16_t round_val = 1 << (shift - 1);
- return sqadd_qs16(a, round_val) >> shift;
-}
-
-inline qint8_t sabs_qs8(qint8_t a)
-{
- return (a < 0) ? (a == std::numeric_limits<int8_t>::min()) ? std::numeric_limits<int8_t>::max() : -a : a;
-}
-
-inline qint16_t sabs_qs16(qint16_t a)
-{
- return (a < 0) ? (a == std::numeric_limits<int16_t>::min()) ? std::numeric_limits<int16_t>::max() : -a : a;
-}
-
-inline qint8_t sadd_qs8(qint8_t a, qint8_t b)
-{
- return a + b;
-}
-
-inline qint16_t sadd_qs16(qint16_t a, qint16_t b)
-{
- return a + b;
-}
-
-inline qint8_t sqadd_qs8(qint8_t a, qint8_t b)
-{
- // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow
- qint16_t tmp = (static_cast<qint16_t>(a) + static_cast<qint16_t>(b));
-
- // Saturate the result in case of overflow and cast to qint8_t
- return utility::saturate_cast<qint8_t>(tmp);
-}
-
-inline qint16_t sqadd_qs16(qint16_t a, qint16_t b)
-{
- // We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow
- qint32_t tmp = (static_cast<qint32_t>(a) + static_cast<qint32_t>(b));
-
- // Saturate the result in case of overflow and cast to qint16_t
- return utility::saturate_cast<qint16_t>(tmp);
-}
-
-inline qint32_t sqadd_qs32(qint32_t a, qint32_t b)
-{
- // We need to store the temporary result in qint64_t otherwise we cannot evaluate the overflow
- qint64_t tmp = (static_cast<qint64_t>(a) + static_cast<qint64_t>(b));
-
- // Saturate the result in case of overflow and cast to qint32_t
- return utility::saturate_cast<qint32_t>(tmp);
-}
-
-inline qint8_t ssub_qs8(qint8_t a, qint8_t b)
-{
- return a - b;
-}
-
-inline qint16_t ssub_qs16(qint16_t a, qint16_t b)
-{
- return a - b;
-}
-
-inline qint8_t sqsub_qs8(qint8_t a, qint8_t b)
-{
- // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow
- qint16_t tmp = static_cast<qint16_t>(a) - static_cast<qint16_t>(b);
-
- // Saturate the result in case of overflow and cast to qint8_t
- return utility::saturate_cast<qint8_t>(tmp);
-}
-
-inline qint16_t sqsub_qs16(qint16_t a, qint16_t b)
-{
- // We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow
- qint32_t tmp = static_cast<qint32_t>(a) - static_cast<qint32_t>(b);
-
- // Saturate the result in case of overflow and cast to qint16_t
- return utility::saturate_cast<qint16_t>(tmp);
-}
-
-inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position)
-{
- const qint16_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return static_cast<qint8_t>(tmp >> fixed_point_position);
-}
-
-inline qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position)
-{
- const qint32_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return static_cast<qint16_t>(tmp >> fixed_point_position);
-}
-
-inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position)
-{
- const qint16_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return utility::saturate_cast<qint8_t>(tmp >> fixed_point_position);
-}
-
-inline qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position)
-{
- const qint32_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return utility::saturate_cast<qint16_t>(tmp >> fixed_point_position);
-}
-
-inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position)
-{
- const qint16_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return tmp >> fixed_point_position;
-}
-
-inline qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position)
-{
- const qint32_t round_up_const = (1 << (fixed_point_position - 1));
-
- qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
-
- // Rounding up
- tmp += round_up_const;
-
- return tmp >> fixed_point_position;
-}
-
-inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position)
-{
- const qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24));
-
- const qint8_t const_three = (3 << fixed_point_position);
- qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift);
- qint8_t x2 = temp;
-
- // We need three iterations to find the result
- for(int i = 0; i < 3; ++i)
- {
- qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position));
- x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1);
- }
-
- temp = shift < 0 ? (x2 << (-shift >> 1)) : (x2 >> (shift >> 1));
-
- return temp;
-}
-
-inline qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position)
-{
- const qint16_t shift = 16 - (fixed_point_position + (__builtin_clz(a) - 16));
-
- const qint16_t const_three = (3 << fixed_point_position);
- qint16_t temp = shift < 0 ? (a << -shift) : (a >> shift);
- qint16_t x2 = temp;
-
- // We need three iterations to find the result
- for(int i = 0; i < 3; ++i)
- {
- qint16_t three_minus_dx = ssub_qs16(const_three, smul_qs16(temp, smul_qs16(x2, x2, fixed_point_position), fixed_point_position));
- x2 = smul_qs16(x2, three_minus_dx, fixed_point_position) >> 1;
- }
-
- temp = shift < 0 ? (x2 << ((-shift) >> 1)) : (x2 >> (shift >> 1));
-
- return temp;
-}
-
-inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position)
-{
- const qint16_t temp = a << fixed_point_position;
- return static_cast<qint8_t>(temp / b);
-}
-
-inline qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position)
-{
- const qint32_t temp = a << fixed_point_position;
- return static_cast<qint16_t>(temp / b);
-}
-
-inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position)
-{
- // Constants
- const qint8_t const_one = (1 << fixed_point_position);
- const qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1;
- const qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one;
- const qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1;
- const qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1;
- const qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1;
- const qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1;
-
- // Polynomial expansion
- const int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position);
- const qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position)));
- qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C);
- sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B);
- sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A);
- sum = sqmul_qs8(alpha, sum, fixed_point_position);
- sum = sqadd_qs8(sum, const_one);
-
- return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a);
-}
-
-inline qint16_t sqexp_qs16(qint16_t a, int fixed_point_position)
-{
- // Constants
- const qint16_t const_one = (1 << fixed_point_position);
- const qint16_t ln2 = ((0x58B9 >> (14 - fixed_point_position)) + 1) >> 1;
- const qint16_t inv_ln2 = (((0x38AA >> (14 - fixed_point_position)) + 1) >> 1) | const_one;
- const qint16_t A = ((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1;
- const qint16_t B = ((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1;
- const qint16_t C = ((0x1693 >> (14 - fixed_point_position)) + 1) >> 1;
- const qint16_t D = ((0x0592 >> (14 - fixed_point_position)) + 1) >> 1;
-
- // Polynomial expansion
- const int dec_a = (sqmul_qs16(a, inv_ln2, fixed_point_position) >> fixed_point_position);
- const qint16_t alpha = sabs_qs16(sqsub_qs16(a, sqmul_qs16(ln2, sqshl_qs16(dec_a, fixed_point_position), fixed_point_position)));
- qint16_t sum = sqadd_qs16(sqmul_qs16(alpha, D, fixed_point_position), C);
- sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), B);
- sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), A);
- sum = sqmul_qs16(alpha, sum, fixed_point_position);
- sum = sqadd_qs16(sum, const_one);
-
- return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs16(sum, dec_a);
-}
-
-inline qint8_t slog_qs8(qint8_t a, int fixed_point_position)
-{
- // Constants
- qint8_t const_one = (1 << fixed_point_position);
- qint8_t ln2 = (0x58 >> (7 - fixed_point_position));
- qint8_t A = (0x5C >> (7 - fixed_point_position - 1));
- qint8_t B = -(0x56 >> (7 - fixed_point_position));
- qint8_t C = (0x29 >> (7 - fixed_point_position));
- qint8_t D = -(0x0A >> (7 - fixed_point_position));
-
- if((const_one == a) || (a < 0))
- {
- return 0;
- }
- else if(a < const_one)
- {
- return -slog_qs8(sdiv_qs8(const_one, a, fixed_point_position), fixed_point_position);
- }
-
- // Remove even powers of 2
- qint8_t shift_val = 31 - __builtin_clz(a >> fixed_point_position);
- a >>= shift_val;
- a = ssub_qs8(a, const_one);
-
- // Polynomial expansion
- qint8_t sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C);
- sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B);
- sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A);
- sum = sqmul_qs8(a, sum, fixed_point_position);
-
- return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position);
-}
-
-inline qint16_t slog_qs16(qint16_t a, int fixed_point_position)
-{
- // Constants
- qint16_t const_one = (1 << fixed_point_position);
- qint16_t ln2 = (0x58B9 >> (7 - fixed_point_position));
- qint16_t A = (0x5C0F >> (7 - fixed_point_position - 1));
- qint16_t B = -(0x56AE >> (7 - fixed_point_position));
- qint16_t C = (0x2933 >> (7 - fixed_point_position));
- qint16_t D = -(0x0AA7 >> (7 - fixed_point_position));
-
- if((const_one == a) || (a < 0))
- {
- return 0;
- }
- else if(a < const_one)
- {
- return -slog_qs16(sdiv_qs16(const_one, a, fixed_point_position), fixed_point_position);
- }
-
- // Remove even powers of 2
- qint16_t shift_val = 31 - __builtin_clz(a >> fixed_point_position);
- a >>= shift_val;
- a = ssub_qs16(a, const_one);
-
- // Polynomial expansion
- qint16_t sum = sqadd_qs16(sqmul_qs16(a, D, fixed_point_position), C);
- sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), B);
- sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), A);
- sum = sqmul_qs16(a, sum, fixed_point_position);
-
- return smul_qs16(sadd_qs16(sum, shift_val << fixed_point_position), ln2, fixed_point_position);
-}
-
-inline float scvt_f32_qs8(qint8_t a, int fixed_point_position)
-{
- return static_cast<float>(a) / (1 << fixed_point_position);
-}
-
-inline qint8_t sqcvt_qs8_f32(float a, int fixed_point_position)
-{
- // round_nearest_integer(a * 2^(fixed_point_position))
- return utility::saturate_cast<qint8_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5));
-}
-
-inline float scvt_f32_qs16(qint16_t a, int fixed_point_position)
-{
- return static_cast<float>(a) / (1 << fixed_point_position);
-}
-
-inline qint16_t sqcvt_qs16_f32(float a, int fixed_point_position)
-{
- // round_nearest_integer(a * 2^(fixed_point_position))
- return utility::saturate_cast<qint16_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5));
-}
-
-inline qint8_t sqmovn_qs16(qint16_t a)
-{
- // Saturate the result in case of overflow and cast to qint8_t
- return utility::saturate_cast<qint8_t>(a);
-}
-
-inline qint16_t sqmovn_qs32(qint32_t a)
-{
- // Saturate the result in case of overflow and cast to qint16_t
- return utility::saturate_cast<qint16_t>(a);
-}
-}
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
index bf971a2729..fcbc3495c3 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
@@ -66,7 +66,7 @@ public:
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index a3cbfb94e3..374e36442b 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -582,21 +582,19 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
}
}
-/** Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty.
+/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty.
*
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] shape New shape.
- * @param[in] num_channels New number of channels.
- * @param[in] data_type New data type
- * @param[in] fixed_point_position New fixed point position
- * @param[in] quantization_info (Optional) New quantization info
+ * @param[in,out] info Tensor info used to check and assign.
+ * @param[in] shape New shape.
+ * @param[in] num_channels New number of channels.
+ * @param[in] data_type New data type
+ * @param[in] quantization_info (Optional) New quantization info
*
* @return True if the tensor info has been initialized
*/
bool auto_init_if_empty(ITensorInfo &info,
const TensorShape &shape,
int num_channels, DataType data_type,
- int fixed_point_position,
QuantizationInfo quantization_info = QuantizationInfo());
/** Auto initialize the tensor info using another tensor info.
@@ -647,16 +645,6 @@ bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
*/
bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
-/** Set the fixed point position to the specified value if
- * the current fixed point position is 0 and the data type is QS8 or QS16
- *
- * @param[in,out] info Tensor info used to check and assign.
- * @param[in] fixed_point_position New fixed point position
- *
- * @return True if the fixed point position has been changed.
- */
-bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position);
-
/** Set the quantization info to the specified value if
* the current quantization info is empty and the data type of asymmetric quantized type
*
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index b359811328..c0e4ab8d7d 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -202,7 +202,6 @@ inline bool auto_init_if_empty(ITensorInfo &info,
const TensorShape &shape,
int num_channels,
DataType data_type,
- int fixed_point_position,
QuantizationInfo quantization_info)
{
if(info.tensor_shape().total_size() == 0)
@@ -210,7 +209,6 @@ inline bool auto_init_if_empty(ITensorInfo &info,
info.set_data_type(data_type);
info.set_num_channels(num_channels);
info.set_tensor_shape(shape);
- info.set_fixed_point_position(fixed_point_position);
info.set_quantization_info(quantization_info);
return true;
}
@@ -225,7 +223,6 @@ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_s
info_sink.set_data_type(info_source.data_type());
info_sink.set_num_channels(info_source.num_channels());
info_sink.set_tensor_shape(info_source.tensor_shape());
- info_sink.set_fixed_point_position(info_source.fixed_point_position());
info_sink.set_quantization_info(info_source.quantization_info());
info_sink.set_data_layout(info_source.data_layout());
return true;
@@ -278,17 +275,6 @@ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout
return false;
}
-inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position)
-{
- if(info.fixed_point_position() == 0 && (info.data_type() == DataType::QS8 || info.data_type() == DataType::QS16))
- {
- info.set_fixed_point_position(fixed_point_position);
- return true;
- }
-
- return false;
-}
-
inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
{
if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index ce0cf53fdf..f113445fb7 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -81,15 +81,6 @@ public:
* @return Reference to this ITensorInfo object
*/
virtual ITensorInfo &set_tensor_shape(const TensorShape &shape) = 0;
- /** Set the fixed point position to the specified value
- *
- * @warning The fixed point position must be set once the data type has been configured
- *
- * @param[in] fixed_point_position The new fixed point position
- *
- * @return Reference to this ITensorInfo object
- */
- virtual ITensorInfo &set_fixed_point_position(int fixed_point_position) = 0;
/** Set the quantization settings (scale and offset) of the tensor.
*
* @param[in] quantization_info QuantizationInfo containing the scale and offset
@@ -158,11 +149,7 @@ public:
* @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...)
*/
virtual size_t offset_element_in_bytes(const Coordinates &pos) const = 0;
- /** Fixed point position used when the tensor data type is QS8 or QS16
- *
- * @return The fixed point position that expresses the number of bits for the fractional part of the number
- */
- virtual int fixed_point_position() const = 0;
+
/** Element size in bytes calculated as data_size() * num_channels()
*
* @return The size of one element in bytes
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h
index 504ec6c444..ce64a8e58b 100644
--- a/arm_compute/core/NEON/NEFixedPoint.h
+++ b/arm_compute/core/NEON/NEFixedPoint.h
@@ -24,1194 +24,10 @@
#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
#define __ARM_COMPUTE_NEFIXEDPOINT_H__
-#include "arm_compute/core/FixedPoint.h"
-
#include <arm_neon.h>
namespace arm_compute
{
-using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
-using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
-using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
-using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
-using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
-using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
-using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
-using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
-using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
-using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
-using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
-using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
-using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
-using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
-using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
-using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
-using qint32x2_t = int32x2_t; /**< 32 bit fixed point vector with 2 elements */
-using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
-using qint32x4x2_t = int32x4x2_t; /**< 32 bit fixed point vector with 8 elements */
-
-/** Get the lower half of a 16 elements vector
- *
- * @param[in] a vector of 16 elements
- *
- * @return 8 bit fixed point vector (8 elements)
- */
-qint8x8_t vget_low_qs8(qint8x16_t a);
-
-/** Get the lower half of a 16 elements vector
- *
- * @param[in] a vector of 8 elements
- *
- * @return 16 bit fixed point vector (4 elements)
- */
-qint16x4_t vget_low_qs16(qint16x8_t a);
-
-/** Get the higher half of a 16 elements vector
- *
- * @param[in] a vector of 16 elements
- *
- * @return 8 bit fixed point vector (8 elements)
- */
-qint8x8_t vget_high_qs8(qint8x16_t a);
-
-/** Get the higher half of a 16 elements vector
- *
- * @param[in] a vector of 8 elements
- *
- * @return 16 bit fixed point vector (4 elements)
- */
-qint16x4_t vget_high_qs16(qint16x8_t a);
-
-/** Load a single 8 bit fixed point vector from memory (8 elements)
- *
- * @param[in] addr Memory address of the 8 bit fixed point vector to load
- *
- * @return 8 bit fixed point vector (8 elements)
- */
-qint8x8_t vld1_qs8(const qint8_t *addr);
-
-/** Load a single 16 bit fixed point vector from memory (4 elements)
- *
- * @param[in] addr Memory address of the 16 bit fixed point vector to load
- *
- * @return 16 bit fixed point vector (4 elements)
- */
-qint16x4_t vld1_qs16(const qint16_t *addr);
-
-/** Load a single 8 bit fixed point vector from memory (16 elements)
- *
- * @param[in] addr Memory address of the 8 bit fixed point vector to load
- *
- * @return 8 bit fixed point vector (16 elements)
- */
-qint8x16_t vld1q_qs8(const qint8_t *addr);
-
-/** Load a single 16 bit fixed point vector from memory (8 elements)
- *
- * @param[in] addr Memory address of the 16 bit fixed point vector to load
- *
- * @return 16 bit fixed point vector (8 elements)
- */
-qint16x8_t vld1q_qs16(const qint16_t *addr);
-
-/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
- *
- * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
- *
- * @return 8 bit fixed point vector (8 elements)
- */
-qint8x8_t vld1_dup_qs8(const qint8_t *addr);
-
-/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
- *
- * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
- *
- * @return 16 bit fixed point vector (4 elements)
- */
-qint16x4_t vld1_dup_qs16(const qint16_t *addr);
-
-/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
- *
- * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
- *
- * @return 8 bit fixed point vector (16 elements)
- */
-qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
-
-/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
- *
- * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
- *
- * @return 16 bit fixed point vector (8 elements)
- */
-qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
-
-/** Load two 16 bit fixed point vectors from memory (8x2 elements)
- *
- * @param[in] addr Memory address of the 16 bit fixed point vectors to load
- *
- * @return 16 bit fixed point vectors (8x2 elements)
- */
-qint16x8x2_t vld2q_qs16(qint16_t *addr);
-
-/** Store a single 8 bit fixed point vector to memory (8 elements)
- *
- * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
- * @param[in] b 8 bit fixed point vector to store
- *
- */
-void vst1_qs8(qint8_t *addr, qint8x8_t b);
-
-/** Store a single 16 bit fixed point vector to memory (4 elements)
- *
- * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
- * @param[in] b 16 bit fixed point vector to store
- *
- */
-void vst1_qs16(qint16_t *addr, qint16x4_t b);
-
-/** Store a single 8 bit fixed point vector to memory (16 elements)
- *
- * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
- * @param[in] b 8 bit fixed point vector to store
- *
- */
-void vst1q_qs8(qint8_t *addr, qint8x16_t b);
-
-/** Store a single 16 bit fixed point vector to memory (8 elements)
- *
- * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
- * @param[in] b 16 bit fixed point vector to store
- *
- */
-void vst1q_qs16(qint16_t *addr, qint16x8_t b);
-
-/** Store two 16 bit fixed point vector to memory (8x2 elements)
- *
- * @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
- * @param[in] b 16 bit fixed point vectors to store
- *
- */
-void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
-
-/** 16 bit fixed point vector saturating narrow (8 elements)
- *
- * @param[in] a 16 bit fixed point vector to convert
- *
- * @return 8 bit fixed point vector
- */
-qint8x8_t vqmovn_q16(qint16x8_t a);
-
-/** 32 bit fixed point vector saturating narrow (4 elements)
- *
- * @param[in] a 32 bit fixed point vector to convert
- *
- * @return 16 bit fixed point vector
- */
-qint16x4_t vqmovn_q32(qint32x4_t a);
-
-/** 8 bit fixed point vector duplicate (8 elements)
- *
- * @param[in] a 8 bit fixed point to duplicate
- *
- * @return The result of the vector duplication
- */
-qint8x8_t vdup_n_qs8(qint8_t a);
-
-/** 16 bit fixed point vector duplicate (4 elements)
- *
- * @param[in] a 16 bit fixed point to duplicate
- *
- * @return The result of the vector duplication
- */
-qint16x4_t vdup_n_qs16(qint16_t a);
-
-/** 8 bit fixed point vector duplicate (16 elements)
- *
- * @param[in] a 8 bit fixed point to duplicate
- *
- * @return The result of the vector duplication
- */
-qint8x16_t vdupq_n_qs8(qint8_t a);
-
-/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
- *
- * @param[in] a floating point value to convert and duplicate
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the vector duplication
- */
-qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
-
-/** Duplicate a float and convert it to 16 bit fixed point vector (8 elements)
- *
- * @param[in] a floating point value to convert and duplicate
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the vector duplication
- */
-qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position);
-
-/** 16 bit fixed point vector duplicate (8 elements)
- *
- * @param[in] a 16 bit fixed point to duplicate
- *
- * @return The result of the vector duplication
- */
-qint16x8_t vdupq_n_qs16(qint16x8_t a);
-
-/** Absolute value of 8 bit fixed point vector (8 elements)
- *
- * @param[in] a 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector absolute value
- */
-qint8x8_t vabs_qs8(qint8x8_t a);
-
-/** Absolute value of 16 bit fixed point vector (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector absolute value
- */
-qint16x4_t vabs_qs16(qint16x4_t a);
-
-/** Absolute value of 8 bit fixed point vector (16 elements)
- *
- * @param[in] a 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector absolute value
- */
-qint8x16_t vabsq_qs8(qint8x16_t a);
-
-/** Absolute value of 16 bit fixed point vector (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector absolute value
- */
-qint16x8_t vabsq_qs16(qint16x8_t a);
-
-/** Saturating absolute value of 8 bit fixed point vector (8 elements)
- *
- * @param[in] a 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector absolute value
- */
-qint8x8_t vqabs_qs8(qint8x8_t a);
-
-/** Saturating absolute value of 16 bit fixed point vector (4 elements)
- *
- * @param[in] a 4 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector absolute value
- */
-qint16x4_t vqabs_qs16(qint16x4_t a);
-
-/** Saturating absolute value of 8 bit fixed point vector (16 elements)
- *
- * @param[in] a 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector absolute value
- */
-qint8x16_t vqabsq_qs8(qint8x16_t a);
-
-/** Saturating absolute value of 16 bit fixed point vector (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector absolute value
- */
-qint16x8_t vqabsq_qs16(qint16x8_t a);
-
-/** 8 bit fixed point vector max (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector max operation
- */
-qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector max (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector max operation
- */
-qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector max (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector max operation
- */
-qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector max (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector max operation
- */
-qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector pairwise max (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector pairwise max operation
- */
-qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector pairwise max (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector pairwise max operation
- */
-qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector min (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector max operation
- */
-qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector min (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector max operation
- */
-qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector min (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector min operation
- */
-qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector min (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector min operation
- */
-qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector pairwise min (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector pairwise min operation
- */
-qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector pairwise min (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector pairwise min operation
- */
-qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector add (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector addition
- */
-qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector add (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector addition
- */
-qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector add (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector addition
- */
-qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector add (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector addition
- */
-qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector saturating add (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
- */
-qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector saturating add (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
- */
-qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector saturating add (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
- */
-qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector saturating add (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
- */
-qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector saturating pairwise add (8 elements)
- *
- * @param[in] a 8 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
- */
-int16x4_t vpaddl_qs8(qint8x8_t a);
-
-/** 8 bit fixed point vector subtraction (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector subtraction
- */
-qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector subtraction (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector subtraction
- */
-qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector subtraction (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector subtraction
- */
-qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector subtraction (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector subtraction
- */
-qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector saturating subtraction (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
- */
-qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
-
-/** 16 bit fixed point vector saturating subtraction (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
- */
-qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
-
-/** 8 bit fixed point vector saturating subtraction (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- *
- * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
- */
-qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
-
-/** 16 bit fixed point vector saturating subtraction (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- *
- * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
- */
-qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
-
-/** 8 bit fixed point vector multiply (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiplication.
- */
-qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
-
-/** 16 bit fixed point vector multiply (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiplication.
- */
-qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
-
-/** 8 bit fixed point vector multiply (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiplication.
- */
-qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
-
-/** 16 bit fixed point vector multiply (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiplication.
- */
-qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
-
-/** 8 bit fixed point vector saturating multiply (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
- */
-qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
-
-/** 16 bit fixed point vector saturating multiply (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
- */
-qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
-
-/** 8 bit fixed point vector saturating multiply (16 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
- */
-qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
-
-/** 16 bit fixed point vector saturating multiply (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
- */
-qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
-
-/** 8 bit fixed point vector long multiply (8 elements)
- *
- * @param[in] a First 8 bit fixed point input vector
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point long vector multiplication.
- */
-qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
-
-/** 16 bit fixed point vector long multiply (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 32 bit fixed point long vector multiplication.
- */
-qint32x4_t vmull_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
-
-/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate
- */
-qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate
- */
-qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
-
-/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate
- */
-qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate
- */
-qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
-
-/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
- */
-qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
- */
-qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
-
-/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
- */
-qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
- */
-qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
-
-/** 8 bit fixed point vector multiply-accumulate long (8 elements).
- * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate long
- */
-qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector multiply-accumulate long (4 elements).
- * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
- *
- * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate long
- */
-qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
-
-/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
- * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
- *
- * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 8 bit fixed point input vector
- * @param[in] c Third 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8 bit fixed point vector multiply-accumulate long
- */
-qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
-
-/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
- * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
- *
- * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] c Third 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit fixed point vector multiply-accumulate long
- */
-qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
-
-/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
- *
- * @param[in] a Float input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
- */
-qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
-
-/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
- *
- * @param[in] a Float input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
- */
-qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
-
-/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
- *
- * @param[in] a Float input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
- */
-qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
-
-/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
- *
- * @param[in] a Float input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
- */
-qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
-
-/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
- *
- * @param[in] a 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 8 bit fixed point -> float32x2x4
- */
-float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 16 bit fixed point -> float32x2
- */
-float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
- *
- * @param[in] a 8 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 8 bit fixed point -> float32x4x4
- */
-float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
-
-/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the conversion 16 bit fixed point -> float32x4x2
- */
-float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
-
-/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit reciprocal (1/a).
- */
-qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit reciprocal (1/a).
- */
-qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit reciprocal (1/a).
- */
-qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit reciprocal (1/a).
- */
-qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Division fixed point 8bit (8 elements)
- *
- * @param[in] a First 8bit fixed point input vector
- * @param[in] b Second 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The quotient and remainder number in fixed point format.
- */
-qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
-
-/** Division fixed point 16 bit (4 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The quotient and remainder number in fixed point format.
- */
-qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
-
-/** Division fixed point 8bit (16 elements)
- *
- * @param[in] a First 8bit fixed point input vector
- * @param[in] b Second 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The quotient and remainder number in 8bit fixed point format.
- */
-qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
-
-/** Division fixed point 16 bit (8 elements)
- *
- * @param[in] a First 16 bit fixed point input vector
- * @param[in] b Second 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The quotient and remainder number in 16 bit fixed point format.
- */
-qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
-
-/** Perform a 4th degree polynomial approximation. (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit taylor approximation.
- */
-template <bool islog>
-qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Perform a 4th degree polynomial approximation. (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit taylor approximation.
- */
-template <bool islog>
-qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Perform a 4th degree polynomial approximation. (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit taylor approximation.
- */
-template <bool islog>
-qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Perform a 4th degree polynomial approximation. (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit taylor approximation.
- */
-template <bool islog>
-qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate saturating exponential fixed point 8bit (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit saturating exponential
- */
-qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate saturating exponential fixed point 16 bit (4 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit saturating exponential
- */
-qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate saturating exponential fixed point 8bit (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit saturating exponential
- */
-qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate saturating exponential fixed point 16 bit (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit saturating exponential
- */
-qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate logarithm fixed point 8 bit (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit logarithm.
- */
-qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate logarithm fixed point 16 bit (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit logarithm.
- */
-qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate logarithm fixed point 16bit (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit logarithm.
- */
-qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate logarithm fixed point 16 bit (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit logarithm.
- */
-qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit inverse sqrt.
- */
-qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit inverse sqrt.
- */
-qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit inverse sqrt.
- */
-qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit inverse sqrt.
- */
-qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit inverse sqrt.
- */
-qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit inverse sqrt.
- */
-qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit inverse sqrt.
- */
-qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16 bit inverse sqrt.
- */
-qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The calculated Hyperbolic Tangent.
- */
-qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
-
-/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The calculated Hyperbolic Tangent.
- */
-qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
-
-/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The calculated Hyperbolic Tangent.
- */
-qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
-
-/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
- *
- * @param[in] a 16 bit fixed point input vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The calculated Hyperbolic Tangent.
- */
-qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
-
-/** Calculate saturating n power for fixed point 8bit (16 elements).
- *
- * pow(a,b) = e^(b*log(a))
- *
- * @param[in] a 8bit fixed point input vector
- * @param[in] b 8bit fixed point power vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 8bit power.
- */
-qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
-
-/** Calculate saturating n power for fixed point 16bit (8 elements).
- *
- * pow(a,b) = e^(b*log(a))
- *
- * @param[in] a 16bit fixed point input vector
- * @param[in] b 16bit fixed point power vector
- * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
- *
- * @return The result of the 16bit power.
- */
-qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
-
/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
*
* @param[in] a Float input vector
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index b86c3cbec3..14e51d825c 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -26,1965 +26,7 @@
namespace arm_compute
{
-/** Exponent polynomial coefficients for 8 bit fixed point (8 elements)
- * Format is in Q0.7 for all elements
- */
-static const std::array<qint8x8_t, 4> exp_tab_qs8 =
-{
- {
- vdup_n_s8(0x7F), // 0.9978546
- vdup_n_s8(0x3F), // 0.4994721
- vdup_n_s8(0x16), // 0.1763723
- vdup_n_s8(0x05), // 0.0435108
- }
-};
-
-/** Exponent polynomial coefficients for 16 bit fixed point (4 elements)
- * Format is in Q0.15 for all elements
- */
-static const std::array<qint16x4_t, 4> exp_tab_qs16 =
-{
- {
- vdup_n_s16(0x7FBA), // 0.9978546
- vdup_n_s16(0x3FE9), // 0.4994721
- vdup_n_s16(0x1693), // 0.1763723
- vdup_n_s16(0x0592), // 0.0435108
- }
-};
-
-/** Exponent polynomial coefficients for 8 bit fixed point (16 elements)
- * Format is in Q0.7 for all elements
- */
-static const std::array<qint8x16_t, 4> exp_tabq_qs8 =
-{
- {
- vdupq_n_s8(0x7F), // 0.9978546
- vdupq_n_s8(0x3F), // 0.4994721
- vdupq_n_s8(0x16), // 0.1763723
- vdupq_n_s8(0x05), // 0.0435108
- }
-};
-
-/** Exponent polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements
- */
-static const std::array<qint16x8_t, 4> exp_tabq_qs16 =
-{
- {
- vdupq_n_s16(0x7FBA), // 0.9978546
- vdupq_n_s16(0x3FE9), // 0.4994721
- vdupq_n_s16(0x1693), // 0.1763723
- vdupq_n_s16(0x0592), // 0.0435108
- }
-};
-
-/** Logarithm polynomial coefficients for 8 bit fixed point (8 elements)
- * Format is in Q0.7 for all elements except the first one which is in Q1.6
- */
-static const std::array<qint8x8_t, 4> log_tab_qs8 =
-{
- {
- vdup_n_s8(0x5C), // 1.4384189
- vdup_n_s8(-0x56), // -0.6771900
- vdup_n_s8(0x29), // 0.3218538
- vdup_n_s8(-0x0A), // -0.0832229
- }
-};
-
-/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements except the first one which is in Q1.14
- */
-static const std::array<qint16x4_t, 4> log_tab_qs16 =
-{
- {
- vdup_n_s16(0x5C0F), // 1.4384189
- vdup_n_s16(-0x56AE), // -0.6771900
- vdup_n_s16(0x2933), // 0.3218538
- vdup_n_s16(-0x0AA7), // -0.0832229
- }
-};
-
-/** Logarithm polynomial coefficients for 8 bit fixed point (16 elements)
- * Format is in Q0.7 for all elements except the first one which is in Q1.6
- */
-static const std::array<qint8x16_t, 4> log_tabq_qs8 =
-{
- {
- vdupq_n_s8(0x5C), // 1.4384189
- vdupq_n_s8(-0x56), // -0.6771900
- vdupq_n_s8(0x29), // 0.3218538
- vdupq_n_s8(-0x0A), // -0.0832229
- }
-};
-
-/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements)
- * Format is in Q0.15 for all elements except the first one which is in Q1.14
- */
-static const std::array<qint16x8_t, 4> log_tabq_qs16 =
-{
- {
- vdupq_n_s16(0x5C0F), // 1.4384189
- vdupq_n_s16(-0x56AE), // -0.6771900
- vdupq_n_s16(0x2933), // 0.3218538
- vdupq_n_s16(-0x0AA7), // -0.0832229
- }
-};
-
#ifndef DOXYGEN_SKIP_THIS
-inline qint8x8_t vget_low_qs8(qint8x16_t a)
-{
- return vget_low_s8(a);
-}
-
-inline qint16x4_t vget_low_qs16(qint16x8_t a)
-{
- return vget_low_s16(a);
-}
-
-inline qint8x8_t vget_high_qs8(qint8x16_t a)
-{
- return vget_high_s8(a);
-}
-
-inline qint16x4_t vget_high_qs16(qint16x8_t a)
-{
- return vget_high_s16(a);
-}
-
-inline qint8x8_t vld1_qs8(const qint8_t *addr)
-{
- return vld1_s8(addr);
-}
-
-inline qint16x4_t vld1_qs16(const qint16_t *addr)
-{
- return vld1_s16(addr);
-}
-
-inline qint8x16_t vld1q_qs8(const qint8_t *addr)
-{
- return vld1q_s8(addr);
-}
-
-inline qint16x8_t vld1q_qs16(const qint16_t *addr)
-{
- return vld1q_s16(addr);
-}
-
-inline qint8x8_t vld1_dup_qs8(const qint8_t *addr)
-{
- return vld1_dup_s8(addr);
-}
-
-inline qint16x4_t vld1_dup_qs16(const qint16_t *addr)
-{
- return vld1_dup_s16(addr);
-}
-
-inline qint8x16_t vld1q_dup_qs8(const qint8_t *addr)
-{
- return vld1q_dup_s8(addr);
-}
-
-inline qint16x8_t vld1q_dup_qs16(const qint16_t *addr)
-{
- return vld1q_dup_s16(addr);
-}
-
-inline qint16x8x2_t vld2q_qs16(const qint16_t *addr)
-{
- return vld2q_s16(addr);
-}
-
-inline void vst1_qs8(qint8_t *addr, qint8x8_t b)
-{
- vst1_s8(addr, b);
-}
-
-inline void vst1_qs16(qint16_t *addr, qint16x4_t b)
-{
- vst1_s16(addr, b);
-}
-
-inline void vst1q_qs8(qint8_t *addr, qint8x16_t b)
-{
- vst1q_s8(addr, b);
-}
-
-inline void vst1q_qs16(qint16_t *addr, qint16x8_t b)
-{
- vst1q_s16(addr, b);
-}
-
-inline void vst2q_qs16(qint16_t *addr, qint16x8x2_t b)
-{
- vst2q_s16(addr, b);
-}
-
-inline qint8x8_t vqmovn_qs16(qint16x8_t a)
-{
- return vqmovn_s16(a);
-}
-
-inline qint16x4_t vqmovn_qs32(qint32x4_t a)
-{
- return vqmovn_s32(a);
-}
-
-inline qint8x8_t vdup_n_qs8(qint8_t a)
-{
- return vdup_n_s8(a);
-}
-
-inline qint16x4_t vdup_n_qs16(qint16_t a)
-{
- return vdup_n_s16(a);
-}
-
-inline qint8x16_t vdupq_n_qs8(qint8_t a)
-{
- return vdupq_n_s8(a);
-}
-
-inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position)
-{
- float32x4x4_t res =
- {
- {
- vdupq_n_f32(a),
- vdupq_n_f32(a),
- vdupq_n_f32(a),
- vdupq_n_f32(a),
- }
- };
- return vqcvtq_qs8_f32(res, fixed_point_position);
-}
-
-inline qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position)
-{
- float32x4x2_t res =
- {
- {
- vdupq_n_f32(a),
- vdupq_n_f32(a),
- }
- };
- return vqcvtq_qs16_f32(res, fixed_point_position);
-}
-
-inline qint16x8_t vdupq_n_qs16(qint16_t a)
-{
- return vdupq_n_s16(a);
-}
-
-inline qint32x4_t vdupq_n_qs32(qint32_t a)
-{
- return vdupq_n_s32(a);
-}
-
-inline qint8x8_t vabs_qs8(qint8x8_t a)
-{
- return vabs_s8(a);
-}
-
-inline qint16x4_t vabs_qs16(qint16x4_t a)
-{
- return vabs_s16(a);
-}
-
-inline qint8x16_t vabsq_qs8(qint8x16_t a)
-{
- return vabsq_s8(a);
-}
-
-inline qint16x8_t vabsq_qs16(qint16x8_t a)
-{
- return vabsq_s16(a);
-}
-
-inline qint8x8_t vqabs_qs8(qint8x8_t a)
-{
- return vqabs_s8(a);
-}
-
-inline qint16x4_t vqabs_qs16(qint16x4_t a)
-{
- return vqabs_s16(a);
-}
-
-inline qint8x16_t vqabsq_qs8(qint8x16_t a)
-{
- return vqabsq_s8(a);
-}
-
-inline qint16x8_t vqabsq_qs16(qint16x8_t a)
-{
- return vqabsq_s16(a);
-}
-
-inline qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vmax_s8(a, b);
-}
-
-inline qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vmax_s16(a, b);
-}
-
-inline qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vmaxq_s8(a, b);
-}
-
-inline qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vpmax_s8(a, b);
-}
-
-inline qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vpmax_s16(a, b);
-}
-
-inline qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vmaxq_s16(a, b);
-}
-
-inline qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vmin_s8(a, b);
-}
-
-inline qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vmin_s16(a, b);
-}
-
-inline qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vminq_s8(a, b);
-}
-
-inline qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vpmin_s8(a, b);
-}
-
-inline qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vpmin_s16(a, b);
-}
-
-inline qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vminq_s16(a, b);
-}
-
-inline qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vadd_s8(a, b);
-}
-
-inline qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vadd_s16(a, b);
-}
-
-inline qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vaddq_s8(a, b);
-}
-
-inline qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vaddq_s16(a, b);
-}
-
-inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vqadd_s8(a, b);
-}
-
-inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vqadd_s16(a, b);
-}
-
-inline qint32x2_t vqadd_qs32(qint32x2_t a, qint32x2_t b)
-{
- return vqadd_s32(a, b);
-}
-
-inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vqaddq_s8(a, b);
-}
-
-inline qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vqaddq_s16(a, b);
-}
-
-inline qint32x4_t vqaddq_qs32(qint32x4_t a, qint32x4_t b)
-{
- return vqaddq_s32(a, b);
-}
-
-inline int16x4_t vpaddl_qs8(qint8x8_t a)
-{
- return vpaddl_s8(a);
-}
-
-inline qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vsub_s8(a, b);
-}
-
-inline qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vsub_s16(a, b);
-}
-
-inline qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vsubq_s8(a, b);
-}
-
-inline qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vsubq_s16(a, b);
-}
-
-inline qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b)
-{
- return vqsub_s8(a, b);
-}
-
-inline qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b)
-{
- return vqsub_s16(a, b);
-}
-
-inline qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b)
-{
- return vqsubq_s8(a, b);
-}
-
-inline qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b)
-{
- return vqsubq_s16(a, b);
-}
-
-inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary result with a constant used to round up the result
- qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- res = vmlal_s8(res, a, b);
-
- // Shift right by fixed_point_position
- res = vshlq_s16(res, fixed_point_position_s16);
-
- // Convert back to qint8
- return vmovn_s16(res);
-}
-
-inline qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary result with a constant used to round up the result
- qint32x4_t res = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- res = vmlal_s16(res, a, b);
-
- // Shift right by fixed_point_position
- res = vshlq_s32(res, fixed_point_position_s32);
-
- // Convert back to qint16
- return vmovn_s32(res);
-}
-
-inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1));
- qint16x8_t res1 = res0;
-
- // Vector multiply-accumulate long
- res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b));
- res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b));
-
- // Shift right by fixed_point_position
- res0 = vshlq_s16(res0, fixed_point_position_s16);
- res1 = vshlq_s16(res1, fixed_point_position_s16);
-
- // Convert back to qint8
- return vcombine_s8(vmovn_s16(res0), vmovn_s16(res1));
-}
-
-inline qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t res0 = vdupq_n_s32(1 << (fixed_point_position - 1));
- qint32x4_t res1 = res0;
-
- // Vector multiply-accumulate long
- res0 = vmlal_s16(res0, vget_low_qs16(a), vget_low_qs16(b));
- res1 = vmlal_s16(res1, vget_high_qs16(a), vget_high_qs16(b));
-
- // Shift right by fixed_point_position
- res0 = vshlq_s32(res0, fixed_point_position_s32);
- res1 = vshlq_s32(res1, fixed_point_position_s32);
-
- // Convert back to qint16
- return vcombine_s16(vmovn_s32(res0), vmovn_s32(res1));
-}
-
-inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary result with a constant used to round up the result
- qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- res = vmlal_s8(res, a, b);
-
- // Shift right by fixed_point_position
- res = vqshlq_s16(res, fixed_point_position_s16);
-
- // Convert back to qint8 and saturate
- return vqmovn_s16(res);
-}
-
-inline qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary result with a constant used to round up the result
- qint32x4_t res = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- res = vmlal_s16(res, a, b);
-
- // Shift right by fixed_point_position
- res = vqshlq_s32(res, fixed_point_position_s32);
-
- // Convert back to qint16 and saturate
- return vqmovn_s32(res);
-}
-
-inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1));
- qint16x8_t res1 = res0;
-
- // Vector multiply-accumulate long
- res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b));
- res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b));
-
- // Shift right by fixed_point_position
- res0 = vqshlq_s16(res0, fixed_point_position_s16);
- res1 = vqshlq_s16(res1, fixed_point_position_s16);
-
- // Convert back to qint8 and saturate
- return vcombine_s8(vqmovn_s16(res0), vqmovn_s16(res1));
-}
-
-inline qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t res0 = vdupq_n_s32(1 << (fixed_point_position - 1));
- qint32x4_t res1 = res0;
-
- // Vector multiply-accumulate long
- res0 = vmlal_s16(res0, vget_low_qs16(a), vget_low_qs16(b));
- res1 = vmlal_s16(res1, vget_high_qs16(a), vget_high_qs16(b));
-
- // Shift right by fixed_point_position
- res0 = vqshlq_s32(res0, fixed_point_position_s32);
- res1 = vqshlq_s32(res1, fixed_point_position_s32);
-
- // Convert back to qint16 and saturate
- return vcombine_s16(vqmovn_s32(res0), vqmovn_s32(res1));
-}
-
-inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- qint16x8_t res = vmull_s8(a, b);
-
- return vqrshlq_s16(res, fixed_point_position_s16);
-}
-
-inline qint32x4_t vmull_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmull_s16(a, b);
-
- // Shift right by fixed_point_position
- return vqshlq_s32(tmp, fixed_point_position_s32);
-}
-
-inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s8(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vshlq_s16(tmp, fixed_point_position_s16);
-
- // Convert back to qint8 and accumulate
- return vadd_s8(a, vmovn_s16(tmp));
-}
-
-inline qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s16(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vshlq_s32(tmp, fixed_point_position_s32);
-
- // Convert back to qint16 and accumulate
- return vadd_s16(a, vmovn_s32(tmp));
-}
-
-inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1));
- qint16x8_t tmp1 = tmp0;
-
- // Vector multiply-accumulate long
- tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c));
- tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c));
-
- // Shift right by fixed_point_position
- tmp0 = vshlq_s16(tmp0, fixed_point_position_s16);
- tmp1 = vshlq_s16(tmp1, fixed_point_position_s16);
-
- // Convert back to qint8 and accumulate
- return vcombine_s8(vadd_s8(vget_low_s8(a), vmovn_s16(tmp0)), vadd_s8(vget_high_s8(a), vmovn_s16(tmp1)));
-}
-
-inline qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp0 = vdupq_n_s32(1 << (fixed_point_position - 1));
- qint32x4_t tmp1 = tmp0;
-
- // Vector multiply-accumulate long
- tmp0 = vmlal_s16(tmp0, vget_low_qs16(b), vget_low_qs16(c));
- tmp1 = vmlal_s16(tmp1, vget_high_qs16(b), vget_high_qs16(c));
-
- // Shift right by fixed_point_position
- tmp0 = vshlq_s32(tmp0, fixed_point_position_s32);
- tmp1 = vshlq_s32(tmp1, fixed_point_position_s32);
-
- // Convert back to qint16 and accumulate
- return vcombine_s16(vadd_s16(vget_low_qs16(a), vmovn_s32(tmp0)), vadd_s16(vget_high_qs16(a), vmovn_s32(tmp1)));
-}
-
-inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s8(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vqshlq_s16(tmp, fixed_point_position_s16);
-
- // Convert back to qint8 and accumulate
- return vqadd_s8(a, vqmovn_s16(tmp));
-}
-
-inline qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s16(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vqshlq_s32(tmp, fixed_point_position_s32);
-
- // Convert back to qint8 and accumulate
- return vqadd_s16(a, vqmovn_s32(tmp));
-}
-
-inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1));
- qint16x8_t tmp1 = tmp0;
-
- // Vector multiply-accumulate long
- tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c));
- tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c));
-
- // Shift right by fixed_point_position
- tmp0 = vqshlq_s16(tmp0, fixed_point_position_s16);
- tmp1 = vqshlq_s16(tmp1, fixed_point_position_s16);
-
- // Convert back to qint8 and accumulate
- qint8x16_t res = vcombine_s8(vqmovn_s16(tmp0), vqmovn_s16(tmp1));
- return vqaddq_s8(a, res);
-}
-
-inline qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp0 = vdupq_n_s32(1 << (fixed_point_position - 1));
- qint32x4_t tmp1 = tmp0;
-
- // Vector multiply-accumulate long
- tmp0 = vmlal_s16(tmp0, vget_low_qs16(b), vget_low_qs16(c));
- tmp1 = vmlal_s16(tmp1, vget_high_qs16(b), vget_high_qs16(c));
-
- // Shift right by fixed_point_position
- tmp0 = vqshlq_s32(tmp0, fixed_point_position_s32);
- tmp1 = vqshlq_s32(tmp1, fixed_point_position_s32);
-
- // Convert back to qint16 and accumulate
- qint16x8_t res = vcombine_s16(vqmovn_s32(tmp0), vqmovn_s32(tmp1));
- return vqaddq_s16(a, res);
-}
-
-inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s8(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vshlq_s16(tmp, fixed_point_position_s16);
-
- // Accumulate
- return vaddq_s16(a, tmp);
-}
-
-inline qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s16(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vshlq_s32(tmp, fixed_point_position_s32);
-
- // Accumulate
- return vaddq_s32(a, tmp);
-}
-
-inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position)
-{
- const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s8(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vqshlq_s16(tmp, fixed_point_position_s16);
-
- // Accumulate
- return vqaddq_s16(a, tmp);
-}
-
-inline qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position)
-{
- const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position);
-
- // Initialize the temporary results with a constant used to round up the result
- qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1));
-
- // Vector multiply-accumulate long
- tmp = vmlal_s16(tmp, b, c);
-
- // Shift right by fixed_point_position
- tmp = vqshlq_s32(tmp, fixed_point_position_s32);
-
- // Accumulate
- return vqaddq_s32(a, tmp);
-}
-
-inline qint8x8_t vqcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position));
-
- float32x4x2_t res_f32 =
- {
- {
- vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)),
- vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f))
- }
- };
-
- res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2);
- res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2);
-
- const int32x4x2_t res_s32 =
- {
- {
- vcvtq_s32_f32(res_f32.val[0]),
- vcvtq_s32_f32(res_f32.val[1]),
- }
- };
-
- const int16x8_t res_s16 = vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1]));
-
- return vqmovn_s16(res_s16);
-}
-
-inline qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position));
-
- float32x4_t res_f32 = vbslq_f32(vcgeq_f32(a, vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f));
-
- res_f32 = vmlaq_f32(res_f32, a, pow2);
-
- const int32x4_t res_s32 = vcvtq_s32_f32(res_f32);
-
- return vqmovn_s32(res_s32);
-}
-
-inline qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position));
-
- float32x4x4_t res_f32 =
- {
- {
- vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)),
- vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)),
- vbslq_f32(vcgeq_f32(a.val[2], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)),
- vbslq_f32(vcgeq_f32(a.val[3], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f))
- }
- };
-
- res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2);
- res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2);
- res_f32.val[2] = vmlaq_f32(res_f32.val[2], a.val[2], pow2);
- res_f32.val[3] = vmlaq_f32(res_f32.val[3], a.val[3], pow2);
-
- const int32x4x4_t res_s32 =
- {
- {
- vcvtq_s32_f32(res_f32.val[0]),
- vcvtq_s32_f32(res_f32.val[1]),
- vcvtq_s32_f32(res_f32.val[2]),
- vcvtq_s32_f32(res_f32.val[3]),
- }
- };
-
- const int16x8x2_t res_s16 =
- {
- {
- vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])),
- vcombine_s16(vqmovn_s32(res_s32.val[2]), vqmovn_s32(res_s32.val[3])),
- }
- };
-
- return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1]));
-}
-
-inline qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position));
-
- float32x4x2_t res_f32 =
- {
- {
- vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)),
- vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f))
- }
- };
-
- res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2);
- res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2);
-
- const int32x4x2_t res_s32 =
- {
- {
- vcvtq_s32_f32(res_f32.val[0]),
- vcvtq_s32_f32(res_f32.val[1])
- }
- };
-
- return vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1]));
-}
-
-inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position));
-
- const int16x8_t res_s16 = vmovl_s8(a);
-
- const int32x4x2_t res_s32 =
- {
- {
- vmovl_s16(vget_low_qs16(res_s16)),
- vmovl_s16(vget_high_qs16(res_s16))
- }
- };
-
- float32x4x2_t res_f32 =
- {
- {
- vcvtq_f32_s32(res_s32.val[0]),
- vcvtq_f32_s32(res_s32.val[1])
- }
- };
-
- res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2);
- res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2);
-
- return res_f32;
-}
-
-inline float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position));
- const float32x4_t res_f32 = vcvtq_f32_s32(vmovl_s16(a));
-
- return vmulq_f32(res_f32, pow2);
-}
-
-inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position));
-
- const int16x8x2_t res_s16 =
- {
- {
- vmovl_s8(vget_low_s8(a)),
- vmovl_s8(vget_high_s8(a)),
- }
- };
-
- const int32x4x4_t res_s32 =
- {
- {
- vmovl_s16(vget_low_qs16(res_s16.val[0])),
- vmovl_s16(vget_high_qs16(res_s16.val[0])),
- vmovl_s16(vget_low_qs16(res_s16.val[1])),
- vmovl_s16(vget_high_qs16(res_s16.val[1])),
- }
- };
-
- float32x4x4_t res_f32 =
- {
- {
- vcvtq_f32_s32(res_s32.val[0]),
- vcvtq_f32_s32(res_s32.val[1]),
- vcvtq_f32_s32(res_s32.val[2]),
- vcvtq_f32_s32(res_s32.val[3])
- }
- };
-
- res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2);
- res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2);
- res_f32.val[2] = vmulq_f32(res_f32.val[2], pow2);
- res_f32.val[3] = vmulq_f32(res_f32.val[3], pow2);
-
- return res_f32;
-}
-
-inline float32x4x2_t vcvtq_f32_qs16(qint16x8_t a, int fixed_point_position)
-{
- const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position));
-
- const int32x4x2_t res_s32 =
- {
- {
- vmovl_s16(vget_low_qs16(a)),
- vmovl_s16(vget_high_qs16(a))
- }
- };
-
- float32x4x2_t res_f32 =
- {
- {
- vcvtq_f32_s32(res_s32.val[0]),
- vcvtq_f32_s32(res_s32.val[1])
- }
- };
-
- res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2);
- res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2);
-
- return res_f32;
-}
-
-inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0
- const qint8x8_t const_48_over_17 = vdup_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823
- const qint8x8_t const_32_over_17 = vdup_n_s8((0x3C >> (5 - fixed_point_position))); // 1.8823
- const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position);
- const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position);
-
- // Find shift value
- const qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))));
- const qint8x8_t temp = vshl_s8(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint8x8_t x = vsub_s8(const_48_over_17, vmul_qs8(temp, const_32_over_17, fixed_point_position));
-
- uint8x8_t set_one = vcgt_s8(x, const_one);
- x = vbsl_s8(set_one, const_one, x);
-
- // Use three iterations of Newton-Raphson method to get the result
- x = vmul_qs8(x, vsub_s8(const_two, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vmul_qs8(x, vsub_s8(const_two, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vmul_qs8(x, vsub_s8(const_two, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
-
- return vshl_s8(x, shift_value);
-}
-
-inline qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0
- const qint16x4_t const_48_over_17 = vdup_n_s16(0x5A5A >> (13 - fixed_point_position)); // 2.823
- const qint16x4_t const_32_over_17 = vdup_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823
- const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position);
- const qint16x4_t const_two = vdup_n_s16(2 << fixed_point_position);
-
- // Find shift value
- const qint16x4_t shift_value = vneg_s16(vsub_s16(vdup_n_s16(8), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))));
- const qint16x4_t temp = vshl_s16(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint16x4_t x = vsub_s16(const_48_over_17, vmul_qs16(temp, const_32_over_17, fixed_point_position));
-
- uint16x4_t set_one = vcgt_s16(x, const_one);
- x = vbsl_s16(set_one, const_one, x);
-
- // Use four iterations of Newton-Raphson method to get the result
- x = vmul_qs16(x, vsub_s16(const_two, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmul_qs16(x, vsub_s16(const_two, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmul_qs16(x, vsub_s16(const_two, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmul_qs16(x, vsub_s16(const_two, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
-
- return vshl_s16(x, shift_value);
-}
-
-inline qint8x8_t vqrecip_qs8(qint8x8_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0
- const qint8x8_t const_48_over_17 = vdup_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823
- const qint8x8_t const_32_over_17 = vdup_n_s8((0x3C >> (5 - fixed_point_position))); // 1.8823
- const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position);
- const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position);
-
- // Find shift value
- const qint8x8_t shift_value = vqneg_s8(vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))));
- const qint8x8_t temp = vqshl_s8(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint8x8_t x = vqsub_s8(const_48_over_17, vqmul_qs8(temp, const_32_over_17, fixed_point_position));
-
- uint8x8_t set_one = vcgt_s8(x, const_one);
- x = vbsl_s8(set_one, const_one, x);
-
- // Use three iterations of Newton-Raphson method to get the result
- x = vqmul_qs8(x, vqsub_s8(const_two, vqmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmul_qs8(x, vqsub_s8(const_two, vqmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmul_qs8(x, vqsub_s8(const_two, vqmul_qs8(temp, x, fixed_point_position)), fixed_point_position);
-
- return vqshl_s8(x, shift_value);
-}
-
-inline qint16x4_t vqrecip_qs16(qint16x4_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0
- const qint16x4_t const_48_over_17 = vdup_n_s16(0x5A5A >> (13 - fixed_point_position)); // 2.823
- const qint16x4_t const_32_over_17 = vdup_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823
- const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position);
- const qint16x4_t const_two = vdup_n_s16(2 << fixed_point_position);
-
- // Find shift value
- const qint16x4_t shift_value = vqneg_s16(vqsub_s16(vdup_n_s16(8), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))));
- const qint16x4_t temp = vqshl_s16(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint16x4_t x = vqsub_s16(const_48_over_17, vqmul_qs16(temp, const_32_over_17, fixed_point_position));
-
- uint16x4_t set_one = vcgt_s16(x, const_one);
- x = vbsl_s16(set_one, const_one, x);
-
- // Use four iterations of Newton-Raphson method to get the result
- x = vqmul_qs16(x, vqsub_s16(const_two, vqmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmul_qs16(x, vqsub_s16(const_two, vqmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmul_qs16(x, vqsub_s16(const_two, vqmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmul_qs16(x, vqsub_s16(const_two, vqmul_qs16(temp, x, fixed_point_position)), fixed_point_position);
-
- return vqshl_s16(x, shift_value);
-}
-
-inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0
- const qint8x16_t const_48_over_17 = vdupq_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823
- const qint8x16_t const_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823
- const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position);
- const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position);
-
- // Find shift value
- const qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))));
- const qint8x16_t temp = vshlq_s8(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_32_over_17, fixed_point_position));
-
- // Set initial guess to one if x > 1
- uint8x16_t set_one = vcgtq_s8(x, const_one);
- x = vbslq_s8(set_one, const_one, x);
-
- // Use three iterations of Newton-Raphson method to get the result
- x = vmulq_qs8(x, vsubq_s8(const_two, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vmulq_qs8(x, vsubq_s8(const_two, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vmulq_qs8(x, vsubq_s8(const_two, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
-
- return vshlq_s8(x, shift_value);
-}
-
-inline qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0
- const qint16x8_t const_48_over_17 = vdupq_n_s16(0x5A56 >> (13 - fixed_point_position)); // 2.823
- const qint16x8_t const_32_over_17 = vdupq_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823
- const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position);
- const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position);
-
- // Find shift value
- const qint16x8_t shift_value = vnegq_s16(vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))));
- const qint16x8_t temp = vshlq_s16(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint16x8_t x = vsubq_qs16(const_48_over_17, vmulq_qs16(temp, const_32_over_17, fixed_point_position));
-
- // Set initial guess to one if x > 1
- uint16x8_t set_one = vcgtq_s16(x, const_one);
- x = vbslq_s16(set_one, const_one, x);
-
- // Use four iterations of Newton-Raphson method to get the result
- x = vmulq_qs16(x, vsubq_s16(const_two, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmulq_qs16(x, vsubq_s16(const_two, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmulq_qs16(x, vsubq_s16(const_two, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vmulq_qs16(x, vsubq_s16(const_two, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
-
- return vshlq_s16(x, shift_value);
-}
-
-inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0
- const qint8x16_t const_48_over_17 = vdupq_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823
- const qint8x16_t const_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823
- const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position);
- const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position);
-
- // Find shift value
- const qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))));
- const qint8x16_t temp = vqshlq_s8(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint8x16_t x = vqsubq_qs8(const_48_over_17, vqmulq_qs8(temp, const_32_over_17, fixed_point_position));
-
- // Set initial guess to one if x > 1
- uint8x16_t set_one = vcgtq_s8(x, const_one);
- x = vbslq_s8(set_one, const_one, x);
-
- // Use three iterations of Newton-Raphson method to get the result
- x = vqmulq_qs8(x, vqsubq_s8(const_two, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmulq_qs8(x, vqsubq_s8(const_two, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmulq_qs8(x, vqsubq_s8(const_two, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position);
-
- return vqshlq_s8(x, shift_value);
-}
-
-inline qint16x8_t vqrecipq_qs16(qint16x8_t a, int fixed_point_position)
-{
- // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0
- const qint16x8_t const_48_over_17 = vdupq_n_s16(0x5A56 >> (13 - fixed_point_position)); // 2.823
- const qint16x8_t const_32_over_17 = vdupq_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823
- const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position);
- const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position);
-
- // Find shift value
- const qint16x8_t shift_value = vqnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))));
- const qint16x8_t temp = vqshlq_s16(a, shift_value);
-
- // Newton-Raphson division initial estimate X0 calculation
- qint16x8_t x = vqsubq_qs16(const_48_over_17, vqmulq_qs16(temp, const_32_over_17, fixed_point_position));
-
- // Set initial guess to one if x > 1
- uint16x8_t set_one = vcgtq_s16(x, const_one);
- x = vbslq_s16(set_one, const_one, x);
-
- // Use four iterations of Newton-Raphson method to get the result
- x = vqmulq_qs16(x, vqsubq_s16(const_two, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmulq_qs16(x, vqsubq_s16(const_two, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmulq_qs16(x, vqsubq_s16(const_two, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
- x = vqmulq_qs16(x, vqsubq_s16(const_two, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position);
-
- // Saturate result in case of overflow
- return vbslq_s16(vceqq_s16(a, vdupq_n_s16(0)), vdupq_n_s16(std::numeric_limits<int16_t>::max()), vqshlq_s16(x, shift_value));
-}
-
-inline qint8x8_t vdiv_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position)
-{
- return vmul_qs8(a, vrecip_qs8(b, fixed_point_position), fixed_point_position);
-}
-
-inline qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position)
-{
- return vmul_qs16(a, vrecip_qs16(b, fixed_point_position), fixed_point_position);
-}
-
-inline qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position)
-{
- return vmulq_qs8(a, vrecipq_qs8(b, fixed_point_position), fixed_point_position);
-}
-
-inline qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
-{
- return vmulq_qs16(a, vrecipq_qs16(b, fixed_point_position), fixed_point_position);
-}
-
-template <bool islog>
-inline qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position));
- const qint8x8_t const_one = vdup_n_s8(1);
- const qint8x8_t A = vrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vadd_s8(shift_value, const_one) : shift_value);
- const qint8x8_t B = vrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value);
- const qint8x8_t C = vrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value);
- const qint8x8_t D = vrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value);
- const qint8x8_t x1 = vadd_s8(vmul_qs8(a, D, fixed_point_position), C);
- const qint8x8_t x2 = vadd_s8(vmul_qs8(a, x1, fixed_point_position), B);
- const qint8x8_t x3 = vadd_s8(vmul_qs8(a, x2, fixed_point_position), A);
- const qint8x8_t res = vmul_qs8(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t shift_value = vdup_n_s16(-(15 - fixed_point_position));
- const qint16x4_t const_one = vdup_n_s16(1);
- const qint16x4_t A = vrshl_s16(islog ? log_tab_qs16[0] : exp_tab_qs16[0], islog ? vadd_s16(shift_value, const_one) : shift_value);
- const qint16x4_t B = vrshl_s16(islog ? log_tab_qs16[1] : exp_tab_qs16[1], shift_value);
- const qint16x4_t C = vrshl_s16(islog ? log_tab_qs16[2] : exp_tab_qs16[2], shift_value);
- const qint16x4_t D = vrshl_s16(islog ? log_tab_qs16[3] : exp_tab_qs16[3], shift_value);
- const qint16x4_t x1 = vadd_s16(vmul_qs16(a, D, fixed_point_position), C);
- const qint16x4_t x2 = vadd_s16(vmul_qs16(a, x1, fixed_point_position), B);
- const qint16x4_t x3 = vadd_s16(vmul_qs16(a, x2, fixed_point_position), A);
- const qint16x4_t res = vmul_qs16(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint8x8_t vqtaylor_poly_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position));
- const qint8x8_t const_one = vdup_n_s8(1);
- const qint8x8_t A = vqrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vqadd_s8(shift_value, const_one) : shift_value);
- const qint8x8_t B = vqrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value);
- const qint8x8_t C = vqrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value);
- const qint8x8_t D = vqrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value);
- const qint8x8_t x1 = vqadd_s8(vqmul_qs8(a, D, fixed_point_position), C);
- const qint8x8_t x2 = vqadd_s8(vqmul_qs8(a, x1, fixed_point_position), B);
- const qint8x8_t x3 = vqadd_s8(vqmul_qs8(a, x2, fixed_point_position), A);
- const qint8x8_t res = vqmul_qs8(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint16x4_t vqtaylor_poly_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t shift_value = vdup_n_s16(-(15 - fixed_point_position));
- const qint16x4_t const_one = vdup_n_s16(1);
- const qint16x4_t A = vqrshl_s16(islog ? log_tab_qs16[0] : exp_tab_qs16[0], islog ? vqadd_s16(shift_value, const_one) : shift_value);
- const qint16x4_t B = vqrshl_s16(islog ? log_tab_qs16[1] : exp_tab_qs16[1], shift_value);
- const qint16x4_t C = vqrshl_s16(islog ? log_tab_qs16[2] : exp_tab_qs16[2], shift_value);
- const qint16x4_t D = vqrshl_s16(islog ? log_tab_qs16[3] : exp_tab_qs16[3], shift_value);
- const qint16x4_t x1 = vqadd_s16(vqmul_qs16(a, D, fixed_point_position), C);
- const qint16x4_t x2 = vqadd_s16(vqmul_qs16(a, x1, fixed_point_position), B);
- const qint16x4_t x3 = vqadd_s16(vqmul_qs16(a, x2, fixed_point_position), A);
- const qint16x4_t res = vqmul_qs16(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position));
- const qint8x16_t const_one = vdupq_n_s8(1);
- const qint8x16_t A = vrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vaddq_s8(shift_value, const_one) : shift_value);
- const qint8x16_t B = vrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value);
- const qint8x16_t C = vrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value);
- const qint8x16_t D = vrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value);
- const qint8x16_t x1 = vaddq_s8(vmulq_qs8(a, D, fixed_point_position), C);
- const qint8x16_t x2 = vaddq_s8(vmulq_qs8(a, x1, fixed_point_position), B);
- const qint8x16_t x3 = vaddq_s8(vmulq_qs8(a, x2, fixed_point_position), A);
- const qint8x16_t res = vmulq_qs8(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t shift_value = vdupq_n_s16(-(15 - fixed_point_position));
- const qint16x8_t const_one = vdupq_n_s16(1);
- const qint16x8_t A = vrshlq_s16(islog ? log_tabq_qs16[0] : exp_tabq_qs16[0], islog ? vaddq_s16(shift_value, const_one) : shift_value);
- const qint16x8_t B = vrshlq_s16(islog ? log_tabq_qs16[1] : exp_tabq_qs16[1], shift_value);
- const qint16x8_t C = vrshlq_s16(islog ? log_tabq_qs16[2] : exp_tabq_qs16[2], shift_value);
- const qint16x8_t D = vrshlq_s16(islog ? log_tabq_qs16[3] : exp_tabq_qs16[3], shift_value);
- const qint16x8_t x1 = vaddq_s16(vmulq_qs16(a, D, fixed_point_position), C);
- const qint16x8_t x2 = vaddq_s16(vmulq_qs16(a, x1, fixed_point_position), B);
- const qint16x8_t x3 = vaddq_s16(vmulq_qs16(a, x2, fixed_point_position), A);
- const qint16x8_t res = vmulq_qs16(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position));
- const qint8x16_t const_one = vdupq_n_s8(1);
- const qint8x16_t A = vqrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vqaddq_s8(shift_value, const_one) : shift_value);
- const qint8x16_t B = vqrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value);
- const qint8x16_t C = vqrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value);
- const qint8x16_t D = vqrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value);
- const qint8x16_t x1 = vqaddq_s8(vqmulq_qs8(a, D, fixed_point_position), C);
- const qint8x16_t x2 = vqaddq_s8(vqmulq_qs8(a, x1, fixed_point_position), B);
- const qint8x16_t x3 = vqaddq_s8(vqmulq_qs8(a, x2, fixed_point_position), A);
- const qint8x16_t res = vqmulq_qs8(a, x3, fixed_point_position);
- return res;
-}
-
-template <bool islog>
-inline qint16x8_t vqtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t shift_value = vdupq_n_s16(-(15 - fixed_point_position));
- const qint16x8_t const_one = vdupq_n_s16(1);
- const qint16x8_t A = vqrshlq_s16(islog ? log_tabq_qs16[0] : exp_tabq_qs16[0], islog ? vqaddq_s16(shift_value, const_one) : shift_value);
- const qint16x8_t B = vqrshlq_s16(islog ? log_tabq_qs16[1] : exp_tabq_qs16[1], shift_value);
- const qint16x8_t C = vqrshlq_s16(islog ? log_tabq_qs16[2] : exp_tabq_qs16[2], shift_value);
- const qint16x8_t D = vqrshlq_s16(islog ? log_tabq_qs16[3] : exp_tabq_qs16[3], shift_value);
- const qint16x8_t x1 = vqaddq_s16(vqmulq_qs16(a, D, fixed_point_position), C);
- const qint16x8_t x2 = vqaddq_s16(vqmulq_qs16(a, x1, fixed_point_position), B);
- const qint16x8_t x3 = vqaddq_s16(vqmulq_qs16(a, x2, fixed_point_position), A);
- const qint16x8_t res = vqmulq_qs16(a, x3, fixed_point_position);
- return res;
-}
-
-inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t shift_value = vdup_n_s8(fixed_point_position - 7);
- const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position);
- const qint8x8_t const_ln2 = vqrshl_s8(vdup_n_s8(0x58), shift_value); // ln(2)
- const qint8x8_t const_inv_ln2 = vorr_s8(vqrshl_s8(vdup_n_s8(0x38), shift_value), const_one); // 1/ln(2)
-
- // Perform range reduction [-log(2),log(2)]
- const qint8x8_t m = vqmul_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2)
-
- // get decimal part from m
- const qint8x8_t dec_m = vqshl_s8(m, vdup_n_s8(-fixed_point_position));
-
- qint8x8_t alpha = vqmul_qs8(vqshl_s8(dec_m, vdup_n_s8(fixed_point_position)), const_ln2, fixed_point_position);
- alpha = vqabs_qs8(vqsub_s8(a, alpha));
-
- // Polynomial Approximation
- qint8x8_t poly = vqtaylor_poly_qs8<false>(alpha, fixed_point_position);
- poly = vqadd_s8(poly, const_one);
-
- // Reconstruct
- poly = vqshl_s8(poly, dec_m);
-
- return poly;
-}
-
-inline qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t shift_value = vdup_n_s16(fixed_point_position - 15);
- const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position);
- const qint16x4_t const_ln2 = vqrshl_s16(vdup_n_s16(0x58B9), shift_value); // ln(2)
- const qint16x4_t const_inv_ln2 = vorr_s16(vqrshl_s16(vdup_n_s16(0x38AA), shift_value), const_one); // 1/ln(2)
-
- // Perform range reduction [-log(2),log(2)]
- const qint16x4_t m = vqmul_qs16(a, const_inv_ln2, fixed_point_position); // x / ln(2)
-
- // get decimal part from m
- const qint16x4_t dec_m = vqshl_s16(m, vdup_n_s16(-fixed_point_position));
-
- qint16x4_t alpha = vqmul_qs16(vqshl_s16(dec_m, vdup_n_s16(fixed_point_position)), const_ln2, fixed_point_position);
- alpha = vqabs_qs16(vqsub_s16(a, alpha));
-
- // Polynomial Approximation
- qint16x4_t poly = vqtaylor_poly_qs16<false>(alpha, fixed_point_position);
- poly = vqadd_s16(poly, const_one);
-
- // Reconstruct
- poly = vqshl_s16(poly, dec_m);
-
- return poly;
-}
-
-inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t shift_value = vdupq_n_s8(fixed_point_position - 7);
- const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position);
- const qint8x16_t const_ln2 = vqrshlq_s8(vdupq_n_s8(0x58), shift_value); // ln(2)
- const qint8x16_t const_inv_ln2 = vorrq_s8(vqrshlq_s8(vdupq_n_s8(0x38), shift_value), const_one); // 1/ln(2)
-
- // Perform range reduction [-log(2),log(2)]
- const qint8x16_t m = vqmulq_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2)
-
- // get decimal part from m
- const qint8x16_t dec_m = vqshlq_s8(m, vdupq_n_s8(-fixed_point_position));
-
- qint8x16_t alpha = vqmulq_qs8(vqshlq_s8(dec_m, vdupq_n_s8(fixed_point_position)), const_ln2, fixed_point_position);
- alpha = vqabsq_qs8(vqsubq_qs8(a, alpha));
-
- // Polynomial Approximation
- qint8x16_t poly = vqtaylor_polyq_qs8<false>(alpha, fixed_point_position);
- poly = vqaddq_s8(poly, const_one);
-
- // Reconstruct
- poly = vqshlq_s8(poly, dec_m);
-
- return poly;
-}
-
-inline qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t shift_value = vdupq_n_s16(fixed_point_position - 15);
- const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position);
- const qint16x8_t const_ln2 = vqrshlq_s16(vdupq_n_s16(0x58B9), shift_value); // ln(2)
- const qint16x8_t const_inv_ln2 = vorrq_s16(vqrshlq_s16(vdupq_n_s16(0x38AA), shift_value), const_one); // 1/ln(2)
-
- // Perform range reduction [-log(2),log(2)]
- const qint16x8_t m = vqmulq_qs16(a, const_inv_ln2, fixed_point_position); // x / ln(2)
-
- // get decimal part from m
- const qint16x8_t dec_m = vqshlq_s16(m, vdupq_n_s16(-fixed_point_position));
-
- qint16x8_t alpha = vqmulq_qs16(vqshlq_s16(dec_m, vdupq_n_s16(fixed_point_position)), const_ln2, fixed_point_position);
- alpha = vqabsq_qs16(vqsubq_qs16(a, alpha));
-
- // Polynomial Approximation
- qint16x8_t poly = vqtaylor_polyq_qs16<false>(alpha, fixed_point_position);
- poly = vqaddq_s16(poly, const_one);
-
- // Reconstruct
- poly = vqshlq_s16(poly, dec_m);
-
- return poly;
-}
-
-inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position);
- const qint8x8_t const_seven_dec = vdup_n_s8(7);
- const qint8x8_t const_ln2 = vdup_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2)
-
- // If 0 < a < 1, calculate log(1/x)
- uint8x8_t calc_reciprocal = vclt_s8(a, const_one);
- qint8x8_t recip = vdup_n_s8(0);
- recip = vbsl_s8(calc_reciprocal, recip, a);
-
- // Calculate reciprocal
- recip = vrecip_qs8(recip, fixed_point_position);
- a = vbsl_s8(calc_reciprocal, recip, a);
-
- // Get decimal part of a
- qint8x8_t shift_value = vdup_n_s8(-fixed_point_position);
- qint8x8_t dec_a = vshl_s8(a, shift_value); // a >> fixed_point_position
-
- // Get exponent of 2^n which is equal or less than dec_a
- shift_value = vsub_s8(const_seven_dec, vclz_s8(dec_a));
-
- // Get x to range (1, 2]
- const qint8x8_t shift_value_neg = vneg_s8(shift_value);
- const qint8x8_t temp = vsub_s8(vrshl_s8(a, shift_value_neg), const_one);
- const qint8x8_t sum = vmul_s8(shift_value, const_one);
-
- // Polynomial Approximation
- qint8x8_t poly = vtaylor_poly_qs8<true>(temp, fixed_point_position);
-
- // Reconstruct
- poly = vmul_qs8(vadd_s8(poly, sum), const_ln2, fixed_point_position);
-
- // Set negative value for 0 < a < 1
- poly = vbsl_s8(calc_reciprocal, vneg_s8(poly), poly);
-
- return poly;
-}
-
-inline qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position);
- const qint16x4_t const_fifteen_dec = vdup_n_s16(15);
- const qint16x4_t const_ln2 = vdup_n_s16(0x58B9 >> (15 - fixed_point_position)); // ln(2)
-
- // If 0 < a < 1, calculate log(1/x)
- uint16x4_t calc_reciprocal = vclt_s16(a, const_one);
- qint16x4_t recip = vdup_n_s16(0);
- recip = vbsl_s16(calc_reciprocal, recip, a);
-
- // Calculate reciprocal
- recip = vrecip_qs16(recip, fixed_point_position);
- a = vbsl_s16(calc_reciprocal, recip, a);
-
- // Get decimal part of a
- qint16x4_t shift_value = vdup_n_s16(-fixed_point_position);
- qint16x4_t dec_a = vshl_s16(a, shift_value); // a >> fixed_point_position
-
- // Get exponent of 2^n which is equal or less than dec_a
- shift_value = vsub_s16(const_fifteen_dec, vclz_s16(dec_a));
-
- // Get x to range (1, 2]
- const qint16x4_t shift_value_neg = vneg_s16(shift_value);
- const qint16x4_t temp = vsub_s16(vrshl_s16(a, shift_value_neg), const_one);
- const qint16x4_t sum = vmul_s16(shift_value, const_one);
-
- // Polynomial Approximation
- qint16x4_t poly = vtaylor_poly_qs16<true>(temp, fixed_point_position);
-
- // Reconstruct
- poly = vmul_qs16(vadd_s16(poly, sum), const_ln2, fixed_point_position);
-
- // Set negative value for 0 < a < 1
- poly = vbsl_s16(calc_reciprocal, vneg_s16(poly), poly);
-
- return poly;
-}
-
-inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position);
- const qint8x16_t const_seven_dec = vdupq_n_s8(7);
- const qint8x16_t const_ln2 = vdupq_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2)
-
- // If 0 < a < 1, calculate log(1/x)
- uint8x16_t calc_reciprocal = vcltq_s8(a, const_one);
- qint8x16_t recip = vdupq_n_s8(0);
- recip = vbslq_s8(calc_reciprocal, a, recip);
-
- // Calculate reciprocal
- recip = vrecipq_qs8(recip, fixed_point_position);
- a = vbslq_s8(calc_reciprocal, recip, a);
-
- // Get decimal part of a
- qint8x16_t shift_value = vdupq_n_s8(-fixed_point_position);
- qint8x16_t dec_a = vshlq_s8(a, shift_value); // a >> fixed_point_position
-
- // Get exponent of 2^n which is equal or less than dec_a
- shift_value = vsubq_s8(const_seven_dec, vclzq_s8(dec_a));
-
- // Get x to range (1, 2]
- const qint8x16_t shift_value_neg = vnegq_s8(shift_value);
- const qint8x16_t temp = vsubq_s8(vrshlq_s8(a, shift_value_neg), const_one);
- const qint8x16_t sum = vmulq_s8(shift_value, const_one);
-
- // Polynomial Approximation
- qint8x16_t poly = vtaylor_polyq_qs8<true>(temp, fixed_point_position);
-
- // Reconstruct
- poly = vmulq_qs8(vaddq_s8(poly, sum), const_ln2, fixed_point_position);
-
- // Set negative value for 0 < a < 1
- poly = vbslq_s8(calc_reciprocal, vnegq_s8(poly), poly);
-
- return poly;
-}
-
-inline qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position);
- const qint16x8_t const_fifteen_dec = vdupq_n_s16(15);
- const qint16x8_t const_ln2 = vdupq_n_s16(0x58B9 >> (15 - fixed_point_position)); // ln(2)
-
- // If 0 < a < 1, calculate log(1/x)
- uint16x8_t calc_reciprocal = vcltq_s16(a, const_one);
- qint16x8_t recip = vdupq_n_s16(0);
- recip = vbslq_s16(calc_reciprocal, a, recip);
-
- // Calculate reciprocal
- recip = vqrecipq_qs16(recip, fixed_point_position);
- a = vbslq_s16(calc_reciprocal, recip, a);
-
- // Get decimal part of a
- qint16x8_t shift_value = vdupq_n_s16(-fixed_point_position);
- qint16x8_t dec_a = vshlq_s16(a, shift_value); // a >> fixed_point_position
-
- // Get exponent of 2^n which is equal or less than dec_a
- shift_value = vqsubq_s16(const_fifteen_dec, vclzq_s16(dec_a));
-
- // Get x to range (1, 2]
- const qint16x8_t shift_value_neg = vnegq_s16(shift_value);
- const qint16x8_t temp = vqsubq_s16(vrshlq_s16(a, shift_value_neg), const_one);
- const qint16x8_t sum = vmulq_s16(shift_value, const_one);
-
- // Polynomial Approximation
- qint16x8_t poly = vtaylor_polyq_qs16<true>(temp, fixed_point_position);
-
- // Reconstruct
- poly = vqmulq_qs16(vqaddq_s16(poly, sum), const_ln2, fixed_point_position);
-
- // Set negative value for 0 < a < 1
- poly = vbslq_s16(calc_reciprocal, vnegq_s16(poly), poly);
-
- return poly;
-}
-
-inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)));
- uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0));
- temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp);
- qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1));
-
- temp = vshl_s8(a, shift_value);
-
- // Initial guess
- qint8x8_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After three iterations we have the result for 8 bit
- x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vshl_s8(x, shift_value2);
-}
-
-inline qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t const_three = vdup_n_s16(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint16x4_t shift_value = vneg_s16(vsub_s16(vdup_n_s16(16), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint16x4_t temp = vsub_s16(vdup_n_s16(16), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)));
- uint16x4_t temp_ltz = vclt_s16(temp, vdup_n_qs16(0));
- temp = vbsl_s16(temp_ltz, vadd_s16(temp, vdup_n_s16(1)), temp);
- qint16x4_t shift_value2 = vneg_s16(vshr_n_s16(temp, 1));
-
- temp = vshl_s16(a, shift_value);
-
- // Initial guess
- qint16x4_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After five iterations we have the result for 8 bit
- x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vshl_s16(x, shift_value2);
-}
-
-inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint8x8_t shift_value = vqneg_s8(vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint8x8_t temp = vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)));
- uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0));
- temp = vbsl_s8(temp_ltz, vqadd_s8(temp, vdup_n_s8(1)), temp);
- qint8x8_t shift_value2 = vqneg_s8(vshr_n_s8(temp, 1));
-
- temp = vqshl_s8(a, shift_value);
-
- // Initial guess
- qint8x8_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After three iterations we have the result for 8 bit
- x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vqshl_s8(x, shift_value2);
-}
-
-inline qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t const_three = vdup_n_s16(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint16x4_t shift_value = vqneg_s16(vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint16x4_t temp = vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)));
- uint16x4_t temp_ltz = vclt_s16(temp, vdup_n_qs16(0));
- temp = vbsl_s16(temp_ltz, vqadd_s16(temp, vdup_n_s16(1)), temp);
- qint16x4_t shift_value2 = vqneg_s16(vshr_n_s16(temp, 1));
-
- temp = vqshl_s16(a, shift_value);
-
- // Initial guess
- qint16x4_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After five iterations we have the result for 16 bit
- x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vqshl_s16(x, shift_value2);
-}
-
-inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)));
- uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0));
- temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp);
- qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1));
-
- temp = vshlq_s8(a, shift_value);
-
- // Initial guess
- qint8x16_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After three iterations we have the result for 8 bit
- x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vshlq_s8(x, shift_value2);
-}
-
-inline qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t const_three = vdupq_n_s16(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint16x8_t shift_value = vnegq_s16(vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint16x8_t temp = vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)));
- uint16x8_t temp_ltz = vcltq_s16(temp, vdupq_n_qs16(0));
- temp = vbslq_s16(temp_ltz, vaddq_s16(temp, vdupq_n_s16(1)), temp);
- qint16x8_t shift_value2 = vnegq_s16(vshrq_n_s16(temp, 1));
-
- temp = vshlq_s16(a, shift_value);
-
- // Initial guess
- qint16x8_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After five iterations we have the result for 16 bit
- x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vshlq_s16(x, shift_value2);
-}
-
-inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint8x16_t temp = vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)));
- uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0));
- temp = vbslq_s8(temp_ltz, vqaddq_s8(temp, vdupq_n_s8(1)), temp);
- qint8x16_t shift_value2 = vqnegq_s8(vshrq_n_s8(temp, 1));
-
- temp = vqshlq_s8(a, shift_value);
-
- // Initial guess
- qint8x16_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After three iterations we have the result for 8 bit
- x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vqshlq_s8(x, shift_value2);
-}
-
-inline qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t const_three = vdupq_n_s16(3 << fixed_point_position);
-
- // Find shift value. Number must be in (0.5, 2) range.
- qint16x8_t shift_value = vqnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))));
-
- // Add one when the shift value is negative in order to get the correct result when we shift right with 1
- qint16x8_t temp = vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)));
- uint16x8_t temp_ltz = vcltq_s16(temp, vdupq_n_qs16(0));
- temp = vbslq_s16(temp_ltz, vqaddq_s16(temp, vdupq_n_s16(1)), temp);
- qint16x8_t shift_value2 = vqnegq_s16(vshrq_n_s16(temp, 1));
-
- temp = vqshlq_s16(a, shift_value);
-
- // Initial guess
- qint16x8_t x = temp;
-
- // Calculate (x / 2) * (3 - a * x^2)
- // After five iterations we have the result for 16 bit
- x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
- x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1);
-
- return vqshlq_s16(x, shift_value2);
-}
-
-inline qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position)
-{
- const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position);
- const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position);
-
- const qint8x8_t exp2x = vqexp_qs8(vqmul_qs8(const_two, a, fixed_point_position), fixed_point_position);
- const qint8x8_t num = vqsub_qs8(exp2x, const_one);
- const qint8x8_t den = vqadd_qs8(exp2x, const_one);
- const qint8x8_t tanh = vqmul_qs8(num, vqrecip_qs8(den, fixed_point_position), fixed_point_position);
-
- return tanh;
-}
-
-inline qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position)
-{
- const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position);
- const qint16x4_t const_two = vdup_n_s16(2 << fixed_point_position);
-
- const qint16x4_t exp2x = vqexp_qs16(vqmul_qs16(const_two, a, fixed_point_position), fixed_point_position);
- const qint16x4_t num = vqsub_qs16(exp2x, const_one);
- const qint16x4_t den = vqadd_qs16(exp2x, const_one);
- const qint16x4_t tanh = vqmul_qs16(num, vqrecip_qs16(den, fixed_point_position), fixed_point_position);
-
- return tanh;
-}
-
-inline qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position)
-{
- const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position);
- const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position);
-
- const qint8x16_t exp2x = vqexpq_qs8(vqmulq_qs8(const_two, a, fixed_point_position), fixed_point_position);
- const qint8x16_t num = vqsubq_qs8(exp2x, const_one);
- const qint8x16_t den = vqaddq_qs8(exp2x, const_one);
- const qint8x16_t tanh = vqmulq_qs8(num, vqrecipq_qs8(den, fixed_point_position), fixed_point_position);
-
- return tanh;
-}
-
-inline qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position)
-{
- const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position);
- const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position);
-
- const qint16x8_t exp2x = vqexpq_qs16(vqmulq_qs16(const_two, a, fixed_point_position), fixed_point_position);
- const qint16x8_t num = vqsubq_qs16(exp2x, const_one);
- const qint16x8_t den = vqaddq_qs16(exp2x, const_one);
- const qint16x8_t tanh = vqmulq_qs16(num, vqrecipq_qs16(den, fixed_point_position), fixed_point_position);
-
- return tanh;
-}
-
-inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position)
-{
- return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position);
-}
-
-inline qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
-{
- return vqexpq_qs16(vqmulq_qs16(b, vlogq_qs16(a, fixed_point_position), fixed_point_position), fixed_point_position);
-}
inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
{
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 06a0a01782..0290e32085 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -24,7 +24,6 @@
#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__
#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/QAsymm8.h"
@@ -59,7 +58,7 @@ public:
* @note If the output tensor is a nullptr, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] activation_info Activation layer information.
*/
@@ -67,7 +66,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
@@ -104,19 +103,7 @@ private:
* @param[in] window Region on which to execute the kernel
*/
template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<std::is_same<T, qint8_t>::value, void>::type activation(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type activation(const Window &window);
- /** Function to apply an activation function on a tensor.
- *
- * @param[in] window Region on which to execute the kernel
- */
- template <ActivationLayerInfo::ActivationFunction F, typename T>
- typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type activation(const Window &window);
private:
ITensor *_input;
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
index 155e792f5d..8cf21eae9d 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
@@ -57,26 +57,24 @@ public:
* Valid configurations (Input1,Input2) -> Output :
*
* - (U8,U8) -> U8
- * - (QS8,QS8) -> QS8
* - (U8,U8) -> S16
* - (S16,U8) -> S16
* - (U8,S16) -> S16
* - (S16,S16) -> S16
- * - (QS16,QS16) -> QS16
* - (F16,F16) -> F16
* - (F32,F32) -> F32
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] policy Overflow policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] output The output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] output The output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] policy Overflow policy.
*
* @return a status
@@ -90,9 +88,9 @@ public:
private:
/** Common signature for all the specialised add functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] window Region on which to execute the kernel.
*/
using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index 73ecfcfeb5..3e93922b65 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -57,26 +57,24 @@ public:
* Valid configurations (Input1,Input2) -> Output :
*
* - (U8,U8) -> U8
- * - (QS8,QS8) -> QS8
* - (U8,U8) -> S16
* - (S16,U8) -> S16
* - (U8,S16) -> S16
* - (S16,S16) -> S16
- * - (QS16,QS16) -> QS16
* - (F16,F16) -> F16
* - (F32,F32) -> F32
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] policy Overflow policy.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] output Output tensor. Data types supported: U8/S16/F16/F32
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
@@ -89,9 +87,9 @@ public:
private:
/** Common signature for all the specialised sub functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] window Region on which to execute the kernel.
*/
using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
index 2d33f87dfa..2a540c151b 100644
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
@@ -57,7 +57,7 @@ public:
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
@@ -72,7 +72,7 @@ public:
*
* @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
@@ -96,22 +96,7 @@ private:
void configure_non_fused();
/** Configure execution function in case of fused activation **/
void configure_fused();
- /** Template function to run batch normalization on 8-bit fixed point
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation>
- void batch_normalization_qs8(const Window &window);
- /** Template function to run batch normalization on 16-bit fixed point
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation>
- void batch_normalization_qs16(const Window &window);
+
/** Template function to run batch normalization on fp16
*
* @tparam fused_activation Boolean that flags if its a fused activation or not
diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
index 9fb493cc4f..f02858e7d9 100644
--- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h
+++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
@@ -72,7 +72,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
@@ -80,7 +80,7 @@ public:
void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
/** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
index 65ce764246..d5c9e3bbe9 100644
--- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
+++ b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
@@ -59,7 +59,7 @@ public:
~NEConvertFullyConnectedWeightsKernel() = default;
/** Set the input and output tensor.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
@@ -67,7 +67,7 @@ public:
void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
index 67ef5293b7..12a5051ef8 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
@@ -55,7 +55,7 @@ public:
~NEDepthConcatenateLayerKernel() = default;
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: F16/F32.
* @param[in] depth_offset The offset on the Z axis.
* @param[in,out] output Output tensor. Data types supported: Same as @p input.
*
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
index 50536f2b47..77bb0413ca 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
@@ -55,19 +55,12 @@ public:
*
* Valid conversions Input -> Output :
*
- * - QS8 -> QS8, F32
* - U8 -> U16, S16, S32
* - U16 -> U8, U32
* - S16 -> U8, S32
- * - QS16 -> QS16, F32
- * - F32 -> QS8
*
- * @warning In case of in-place fixed point position conversion make sure that configure has been called
- * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
- * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
- *
- * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
- * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
+ * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16.
+ * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
* @param[in] policy Conversion policy.
* @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
* In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
@@ -82,8 +75,6 @@ private:
ITensor *_output;
ConvertPolicy _policy;
uint32_t _shift;
- int _fixed_point_position_input;
- int _fixed_point_position_output;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index f859f97dae..589725ab01 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -57,24 +57,24 @@ public:
* 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
*
* @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32.
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
*
* @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32.
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
* The 3rd dimension must be the same as the input's volume 3rd dimension.
* Data type supported:Same as @p input.
* @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS16/QS32/F16/F32
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: QS32/F16/F32
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index 77711d7ecd..7fd1d70374 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -55,10 +55,10 @@ public:
/** Set the accumulate buffer and the biases of the kernel.
*
* @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS16/QS32/F16/F32
+ * Data type supported: QS32/F16/F32
* @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: QS8/QS16/F16/F32
+ * Data type supported: F16/F32
* @param[in] result_fixedpoint_multiplier (Optional)Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift (Optional)Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
* @param[in] result_offset_after_shift (Optional)Offset to be applied to result before converting it back to QASYMM8
@@ -68,10 +68,10 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
*
* @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: QS16/QS32/F16/F32
+ * Data type supported: QS32/F16/F32
* @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
* @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: QS8/QS16/F16/F32
+ * Data type supported: F16/F32
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
index dd19b8f35a..cff6b4ea2d 100644
--- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
+++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
@@ -57,7 +57,7 @@ public:
*
* @note This kernel fills the borders within the XY-planes.
*
- * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QASYMM8/QS16/S16/S32/F32.
+ * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QASYMM8/S16/S32/F32.
* @param[in] border_size Size of the border to fill in elements.
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h
index 545a265dc2..2b6c7af72a 100644
--- a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h
+++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h
@@ -57,7 +57,7 @@ public:
*
* @note This kernel fills the borders within the XY-planes.
*
- * @param[in,out] input Tensor to process. Data types supported: U8/QS8/S16/S32/F32.
+ * @param[in,out] input Tensor to process. Data types supported: U8/S16/S32/F32.
* @param[in] border_size Size of the border to fill in elements.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index 79504fd4da..5c0104d138 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -60,13 +60,13 @@ public:
NEGEMMInterleave4x4Kernel();
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/QS16/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel
*
- * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/QS16/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
*
* @return a status
@@ -79,7 +79,7 @@ public:
private:
/** Common signature for all the transpose functions
*
- * @param[in] input An input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input An input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data type supported: same as @p input
* @param[in] window Region on which to execute the kernel.
*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
index e48a9a77e4..419a9f9150 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
@@ -51,13 +51,13 @@ public:
~NEGEMMMatrixAccumulateBiasesKernel() = default;
/** Set the accumulate buffer and the biases of the kernel.
*
- * @param[in, out] accum The accumulate tensor to convert. Data type supported: QS8/QS16/F32
+ * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32
* @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input
*/
void configure(ITensor *accum, const ITensor *biases);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAccumulateBiasesKernel
*
- * @param[in] accum The accumulate tensor to convert. Data type supported: QS8/QS16/F32
+ * @param[in] accum The accumulate tensor to convert. Data type supported: F32
* @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 5e4f8b72ff..1a235933dc 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -59,7 +59,7 @@ public:
*
* @note The input and output tensor must have the same dimensions
*
- * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
+ * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
* @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
* @param[in] beta Weight of matrix C
*/
@@ -71,7 +71,7 @@ public:
private:
/** Common signature for all the matrix addition functions
*
- * @param[in] input An input tensor. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input An input tensor. Data types supported: F16/F32
* @param[out] output The output tensor. Data type supported: same as @p input
* @param[in] window Region on which to execute the kernel.
* @param[in] beta Weight of matrix C
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
index d54522c678..6ee958205e 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
@@ -58,7 +58,7 @@ public:
* @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
* These two kernels change the layout of the original matrices to be more cache-friendly.
*
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
* @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
* If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
@@ -69,7 +69,7 @@ public:
void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel
*
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
* @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
* If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
* @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index fcdd8dd93c..b7fbfcfcd2 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -74,13 +74,13 @@ public:
}
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
*
- * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor info. Data type supported: same as @p input.
*
* @return a status
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index 5aa803f4fd..d455fd98b3 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -77,7 +77,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -92,7 +92,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
index 6ae7b73423..92086437a6 100644
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
@@ -54,7 +54,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/FP16/F32.
+ * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32.
* @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
* Data type supported: same as @p input
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
@@ -64,7 +64,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/FP16/F32.
+ * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32.
* @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
* Data type supported: same as @p input
* @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
@@ -92,18 +92,6 @@ private:
template <DataType dt, unsigned int dim, bool do_2D_norm>
void normalize_float(const Window &window);
- /** Function to perform normalization for fixed-point values depending on
- * the given template dimension. The second template parameter specifies
- * whether the normalization has to be 1D or 2D.
- *
- * @note Only supported normalizations are:
- * - 1D over X or Z
- * - 2D over X and Y
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <DataType dt, unsigned int dim, bool do_2D_norm>
- void normalize_fixed_point(const Window &window);
/** Common signature for all the specialised normalization functions
*
* @param[in] window Region on which to execute the kernel.
diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
index 68bbdcb3cb..b56faa8514 100644
--- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
@@ -58,7 +58,7 @@ public:
*
* @note Supported permutation vectors : [2, 0, 1], [1, 2, 0]
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
@@ -67,7 +67,7 @@ public:
*
* @note Supported permutation vectors : [2, 0, 1], [1, 2, 0]
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
index 8c245569a5..41ea91495f 100644
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
@@ -55,11 +55,10 @@ public:
*
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- * For QS8/QS16 scale = 1 is the only supported value.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy.
@@ -70,11 +69,10 @@ public:
*
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- * For QS8/QS16 scale = 1 is the only supported value.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8, QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
- * @param[in] output The output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
+ * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8, S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32).
+ * @param[in] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32).
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy.
@@ -96,15 +94,6 @@ private:
* @param[out] output_ptr Pointer to the output tensor.
*/
using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale);
- /** Common signature for all the specialised multiplication functions with fixed-point values
- *
- * @param[in] input1_ptr Pointer to the first input tensor.
- * @param[in] input2_ptr Pointer to the second input tensor.
- * @param[in] scale Scaling factor.
- * @param[in] fixed_point_position Fixed-point position that expresses the number of bits for the fractional part of the number.
- * @param[out] output_ptr Pointer to the output tensor.
- */
- using MulFunctionQInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale, int fixed_point_position);
/** Common signature for all the specialised multiplication functions with float scaling factor
*
* @param[in] input1_ptr Pointer to the first input tensor.
@@ -115,7 +104,6 @@ private:
MulFunctionFloat *_func_float;
MulFunctionInt *_func_int;
- MulFunctionQInt *_func_q_int;
private:
const ITensor *_input1;
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 4140ccf1ed..6c4c1db289 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -52,18 +52,18 @@ public:
~NEPoolingLayerKernel() = default;
/** Set the input and output tensors.
*
- * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+ * @note F16 are supported for pool sizes 2 and 3 only
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
*
- * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+ * @note F16 are supported for pool sizes 2 and 3 only
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
@@ -90,13 +90,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void pooling2_f16_nchw(const Window &window_input, const Window &window);
- /** Function to perform 2x2 pooling for 8bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void pooling2_q8_nchw(const Window &window_input, const Window &window);
/** Function to perform 2x2 pooling for 8bit asymmetric fixed point.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -104,13 +97,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void pooling2_qasymm8_nchw(const Window &window_input, const Window &window);
- /** Function to perform 2x2 pooling for 16bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void pooling2_q16_nchw(const Window &window_input, const Window &window);
/** Function to perform 3x3 pooling.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -125,13 +111,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void pooling3_f16_nchw(const Window &window_input, const Window &window);
- /** Function to perform 3x3 pooling for 8bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void pooling3_q8_nchw(const Window &window_input, const Window &window);
/** Function to perform 3x3 pooling for 8bit quantized fixed point.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -139,13 +118,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void pooling3_qasymm8_nchw(const Window &window_input, const Window &window);
- /** Function to perform 3x3 pooling for 16bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void pooling3_q16_nchw(const Window &window_input, const Window &window);
/** Function to perform 7x7 pooling.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -153,13 +125,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void pooling7_f32_nchw(const Window &window_input, const Window &window);
- /** Function to perform MxN pooling for 8bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void poolingMxN_q8_nchw(const Window &window_input, const Window &window);
/** Function to perform MxN pooling for 8-bit quantized.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -174,13 +139,6 @@ private:
*/
template <PoolingType pooling_type, bool exclude_padding = false>
void poolingMxN_qasymm8_nhwc(const Window &window_input, const Window &window);
- /** Function to perform MxN pooling for 16bit fixed point.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <PoolingType pooling_type>
- void poolingMxN_q16_nchw(const Window &window_input, const Window &window);
/** Function to perform MxN pooling for 16-bit floating point values.
*
* @param[in] window_input Input region on which to execute the kernel.
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
index 0a3fc44881..08b4e11189 100644
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
@@ -40,7 +40,7 @@ public:
}
/** Set the input and output of the kernel
*
- * @param[in] input Source tensor. Data type supported: U8/S8/QS8/U16/S16/QS16/QASYMM8/U32/S32/F16/F32
+ * @param[in] input Source tensor. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32
* @param[out] output Destination tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
index c30a4cd23d..25c3196e34 100644
--- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
@@ -43,13 +43,13 @@ public:
NELogits1DMaxKernel();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data types supported: same as @p input
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor. Data types supported: same as @p input
*
* @return a status
@@ -90,7 +90,7 @@ public:
~NELogits1DSoftmaxKernel() = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1.
* Data types supported: same as @p input.
* @param[out] output Destination tensor. Data types supported: same as @p input.
@@ -101,7 +101,7 @@ public:
void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp);
/** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
* @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1.
* Data types supported: same as @p input.
* @param[in] output Destination tensor info. Data types supported: same as @p input.
diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h
index dc7ef8ff7a..76823acfa1 100644
--- a/arm_compute/core/NEON/kernels/NETransposeKernel.h
+++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h
@@ -57,13 +57,13 @@ public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output Output tensor. Data type supported: Same as @p input
*
* @return a status
@@ -76,7 +76,7 @@ public:
private:
/** Common signature for all the transpose functions
*
- * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data type supported: same as @p input
* @param[in] window Region on which to execute the kernel.
*/
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
index 1a7525bfc7..21f36f6c2b 100644
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
@@ -75,7 +75,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/QASYMM8/QS16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F32
* @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
@@ -85,7 +85,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/F16/F32
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h
index fee206638b..fd0c0f0c34 100644
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h
+++ b/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h
@@ -45,13 +45,11 @@ inline float32x4x3_t load_matrix_row(const float *ptr)
}
template <unsigned int stridex>
-float32x4x2_t convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, int fixed_point_position);
+float32x4x2_t convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2);
template <>
-inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, int fixed_point_position)
+inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
-
const float32x4x3_t vtop =
{
{
@@ -108,9 +106,9 @@ inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, c
}
template <>
-inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, int fixed_point_position)
+inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
{
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position);
+ float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3);
@@ -118,9 +116,9 @@ inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, c
}
template <>
-inline float32x4x2_t convolve_3x3<3>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, int fixed_point_position)
+inline float32x4x2_t convolve_3x3<3>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2)
{
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position);
+ float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
return out;
}
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
index 908fa13876..d56fd44700 100644
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
+++ b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
@@ -55,29 +55,6 @@ inline float32x4x3_t load_matrix_row(const float *ptr, int weights_offset = 0)
return r;
}
-/** Loads a 3x3 matrix as a row (qint8_t).
- *
- * @param[in] ptr Pointer to a qint8 3x3 matrix.
- * @param[in] weights_offset (Optional) Weights quantization offset.
- *
- * @return The loaded matrix.
- */
-inline qint8x8x3_t load_matrix_row(const qint8_t *ptr, int weights_offset = 0)
-{
- ARM_COMPUTE_UNUSED(weights_offset);
- /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes:
- r.val[0] contains the first element, r.val[1] the second element and r.val[2] the third element (in all lanes) */
- const qint8x8x3_t r =
- {
- {
- vld1_dup_qs8(ptr),
- vld1_dup_qs8(1 + ptr),
- vld1_dup_qs8(2 + ptr)
- }
- };
- return r;
-}
-
/** Loads a 3x3 matrix as a row (uint8_t).
*
* @param[in] ptr Pointer to a uint8_t 3x3 matrix.
@@ -104,27 +81,25 @@ inline int32x4x3_t load_matrix_row(const uint8_t *ptr, int weights_offset = 0)
/** Perform a convolve3x3 on float32.
*
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] fixed_point_position (Optional) Fixed point position.
- * @param[in] input_offset (Optional) Input quantization offset.
+ * @param[in] in_top Pointer to the first row of the input.
+ * @param[in] in_mid Pointer to the second row of the input.
+ * @param[in] in_low Pointer to the third row of the input.
+ * @param[in] m0 First row of the filter.
+ * @param[in] m1 Second row of the filter.
+ * @param[in] m2 Third row of the filter.
+ * @param[in] input_offset (Optional) Input quantization offset.
*
*/
template <unsigned int stridex>
float32x4x2_t convolve_3x3(const float *in_top, const float *in_mid, const float *in_low,
const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- int fixed_point_position, int input_offset = 0);
+ int input_offset = 0);
template <>
inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, const float *in_low,
const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
ARM_COMPUTE_UNUSED(input_offset);
const float32x4x3_t vtop =
@@ -185,11 +160,11 @@ inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, c
template <>
inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, const float *in_low,
const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
ARM_COMPUTE_UNUSED(input_offset);
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
+ float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, input_offset);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3);
@@ -199,145 +174,35 @@ inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, c
template <>
inline float32x4x2_t convolve_3x3<3>(const float *in_top, const float *in_mid, const float *in_low,
const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
ARM_COMPUTE_UNUSED(input_offset);
- float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
+ float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, input_offset);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
return out;
}
-/** Perform a convolve3x3 on qint16.
- *
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] fixed_point_position (Optional) Fixed point position.
- * @param[in] input_offset (Optional) Input quantization offset.
- *
- */
-template <unsigned int stridex>
-qint16x8x2_t convolve_3x3(const qint8_t *in_top, const qint8_t *in_mid, const qint8_t *in_low,
- const qint8x8x3_t &m0, const qint8x8x3_t &m1, const qint8x8x3_t &m2,
- int fixed_point_position, int input_offset = 0);
-
-template <>
-inline qint16x8x2_t convolve_3x3<1>(const qint8_t *in_top, const qint8_t *in_mid, const qint8_t *in_low,
- const qint8x8x3_t &m0, const qint8x8x3_t &m1, const qint8x8x3_t &m2,
- int fixed_point_position, int input_offset)
-{
- ARM_COMPUTE_UNUSED(fixed_point_position);
- ARM_COMPUTE_UNUSED(input_offset);
-
- const qint8x8x3_t vtop =
- {
- {
- vld1_qs8(in_top),
- vld1_qs8(in_top + 8),
- vld1_qs8(in_top + 16)
- }
- };
- const qint8x8x3_t vmid =
- {
- {
- vld1_qs8(in_mid),
- vld1_qs8(in_mid + 8),
- vld1_qs8(in_mid + 16)
- }
- };
- const qint8x8x3_t vlow =
- {
- {
- vld1_qs8(in_low),
- vld1_qs8(in_low + 8),
- vld1_qs8(in_low + 16)
- }
- };
- qint16x8x2_t out =
- {
- {
- vmull_qs8(vtop.val[0], m0.val[0], fixed_point_position),
- vmull_qs8(vtop.val[1], m0.val[0], fixed_point_position)
- }
- };
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vtop.val[0], vtop.val[1], 1), m0.val[1], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vtop.val[0], vtop.val[1], 2), m0.val[2], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vmid.val[0], m1.val[0], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vmid.val[0], vmid.val[1], 1), m1.val[1], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vmid.val[0], vmid.val[1], 2), m1.val[2], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vlow.val[0], m2.val[0], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vlow.val[0], vlow.val[1], 1), m2.val[1], fixed_point_position);
- out.val[0] = vqmlal_qs8(out.val[0], vext_s8(vlow.val[0], vlow.val[1], 2), m2.val[2], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vtop.val[1], vtop.val[2], 1), m0.val[1], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vtop.val[1], vtop.val[2], 2), m0.val[2], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vmid.val[1], m1.val[0], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vmid.val[1], vmid.val[2], 1), m1.val[1], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vmid.val[1], vmid.val[2], 2), m1.val[2], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vlow.val[1], m2.val[0], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vlow.val[1], vlow.val[2], 1), m2.val[1], fixed_point_position);
- out.val[1] = vqmlal_qs8(out.val[1], vext_s8(vlow.val[1], vlow.val[2], 2), m2.val[2], fixed_point_position);
- return out;
-}
-
-template <>
-inline qint16x8x2_t convolve_3x3<2>(const qint8_t *in_top, const qint8_t *in_mid, const qint8_t *in_low,
- const qint8x8x3_t &m0, const qint8x8x3_t &m1, const qint8x8x3_t &m2,
- int fixed_point_position, int input_offset)
-{
- ARM_COMPUTE_UNUSED(input_offset);
-
- qint16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[0], 2), out.val[0], 1);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[0], 4), out.val[0], 2);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[0], 6), out.val[0], 3);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[1], 0), out.val[0], 4);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[1], 2), out.val[0], 5);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[1], 4), out.val[0], 6);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[1], 6), out.val[0], 7);
- return out;
-}
-
-template <>
-inline qint16x8x2_t convolve_3x3<3>(const qint8_t *in_top, const qint8_t *in_mid, const qint8_t *in_low,
- const qint8x8x3_t &m0, const qint8x8x3_t &m1, const qint8x8x3_t &m2,
- int fixed_point_position, int input_offset)
-{
- ARM_COMPUTE_UNUSED(input_offset);
-
- qint16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[0], 3), out.val[0], 1);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[0], 6), out.val[0], 2);
- out.val[0] = vsetq_lane_s16(vgetq_lane_s16(out.val[1], 1), out.val[0], 3);
- return out;
-}
-
/** Perform a convolve3x3 on uint8_t
*
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] fixed_point_position (Optional) Fixed point position.
- * @param[in] input_offset (Optional) Input quantization offset.
+ * @param[in] in_top Pointer to the first row of the input.
+ * @param[in] in_mid Pointer to the second row of the input.
+ * @param[in] in_low Pointer to the third row of the input.
+ * @param[in] m0 First row of the filter.
+ * @param[in] m1 Second row of the filter.
+ * @param[in] m2 Third row of the filter.
+ * @param[in] input_offset (Optional) Input quantization offset.
*
*/
template <unsigned int stridex>
int32x4x2_t convolve_3x3(const uint8_t *in_top, const uint8_t *in_mid, const uint8_t *in_low,
const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- int fixed_point_position, int input_offset);
+ int input_offset);
template <>
inline int32x4x2_t convolve_3x3<1>(const uint8_t *in_top, const uint8_t *in_mid, const uint8_t *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
-
const int32x4_t v_input_offset = vdupq_n_s32(input_offset);
const uint8x8x2_t vtop =
@@ -427,11 +292,9 @@ inline int32x4x2_t convolve_3x3<1>(const uint8_t *in_top, const uint8_t *in_mid,
template <>
inline int32x4x2_t convolve_3x3<2>(const uint8_t *in_top, const uint8_t *in_mid, const uint8_t *in_low,
const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
-
- int32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
+ int32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, input_offset);
out.val[0] = vsetq_lane_s32(vgetq_lane_s32(out.val[0], 2), out.val[0], 1);
out.val[0] = vsetq_lane_s32(vgetq_lane_s32(out.val[1], 0), out.val[0], 2);
out.val[0] = vsetq_lane_s32(vgetq_lane_s32(out.val[1], 2), out.val[0], 3);
@@ -441,10 +304,9 @@ inline int32x4x2_t convolve_3x3<2>(const uint8_t *in_top, const uint8_t *in_mid,
template <>
inline int32x4x2_t convolve_3x3<3>(const uint8_t *in_top, const uint8_t *in_mid, const uint8_t *in_low,
const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2,
- int fixed_point_position, int input_offset)
+ int input_offset)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
- int32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position, input_offset);
+ int32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, input_offset);
out.val[0] = vsetq_lane_s32(vgetq_lane_s32(out.val[0], 3), out.val[0], 1);
return out;
}
@@ -477,34 +339,6 @@ inline void store_results<3>(float *buffer, const float32x4x2_t &values)
vst1_f32(buffer, vget_low_f32(values.val[0]));
}
-/** Stores a qint16_t array into a memory location.
- *
- * @param[in] buffer Pointer to the memory location where the values will be stored.
- * @param[in] values Values that will be stored.
- *
- */
-template <unsigned int stridex>
-void store_results(qint16_t *buffer, const qint16x8x2_t &values);
-
-template <>
-inline void store_results<1>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1q_qs16(buffer, values.val[0]);
- vst1q_qs16(buffer + 8, values.val[1]);
-}
-
-template <>
-inline void store_results<2>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1q_qs16(buffer, values.val[0]);
-}
-
-template <>
-inline void store_results<3>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1_qs16(buffer, vget_low_s16(values.val[0]));
-}
-
/** Stores a uint32_t array into a memory location.
*
* @param[in] buffer Pointer to the memory location where the values will be stored.
@@ -557,25 +391,20 @@ inline float16x8x3_t load_matrix_row(const float16_t *ptr)
/** Perform a convolve3x3 on float16.
*
- * @param[in] in_top Pointer to the first row of the input.
- * @param[in] in_mid Pointer to the second row of the input.
- * @param[in] in_low Pointer to the third row of the input.
- * @param[in] m0 First row of the filter.
- * @param[in] m1 Second row of the filter.
- * @param[in] m2 Third row of the filter.
- * @param[in] fixed_point_position (Optional) Fixed point position.
+ * @param[in] in_top Pointer to the first row of the input.
+ * @param[in] in_mid Pointer to the second row of the input.
+ * @param[in] in_low Pointer to the third row of the input.
+ * @param[in] m0 First row of the filter.
+ * @param[in] m1 Second row of the filter.
+ * @param[in] m2 Third row of the filter.
*
*/
template <unsigned int stridex>
-float16x8x2_t convolve_3x3(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- int fixed_point_position);
+float16x8x2_t convolve_3x3(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2);
template <>
-inline float16x8x2_t convolve_3x3<1>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- int fixed_point_position)
+inline float16x8x2_t convolve_3x3<1>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
-
const float16x8x3_t vtop =
{
{
@@ -627,10 +456,9 @@ inline float16x8x2_t convolve_3x3<1>(const float16_t *in_top, const float16_t *i
}
template <>
-inline float16x8x2_t convolve_3x3<2>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- int fixed_point_position)
+inline float16x8x2_t convolve_3x3<2>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2)
{
- float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position);
+ float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 2), out.val[0], 1);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 2);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 3);
@@ -638,10 +466,9 @@ inline float16x8x2_t convolve_3x3<2>(const float16_t *in_top, const float16_t *i
}
template <>
-inline float16x8x2_t convolve_3x3<3>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2,
- int fixed_point_position)
+inline float16x8x2_t convolve_3x3<3>(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2)
{
- float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2, fixed_point_position);
+ float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1);
return out;
}
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 882e4ec1d0..681e27033e 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -98,12 +98,6 @@ public:
_parent->set_format(format);
return *this;
};
- ITensorInfo &set_fixed_point_position(int fixed_point_position) override
- {
- ARM_COMPUTE_ERROR_ON(_parent == nullptr);
- _parent->set_fixed_point_position(fixed_point_position);
- return *this;
- };
ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override
{
@@ -143,11 +137,6 @@ public:
return _parent->offset_element_in_bytes(_coords);
}
size_t offset_element_in_bytes(const Coordinates &pos) const override;
- int fixed_point_position() const override
- {
- ARM_COMPUTE_ERROR_ON(_parent == nullptr);
- return _parent->fixed_point_position();
- }
size_t element_size() const override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index f8cfb35357..1eaf052d8e 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -86,20 +86,18 @@ public:
*
* Can be used for automatic derivation of the shape by the function.
*
- * @param[in] num_channels It indicates the number of channels for each tensor element
- * @param[in] data_type Data type to use for each tensor element
- * @param[in] fixed_point_position (Optional) It specifies the fixed point position when the tensor data type is QS8, QS16 or QS32.
+ * @param[in] num_channels It indicates the number of channels for each tensor element
+ * @param[in] data_type Data type to use for each tensor element
*/
- TensorInfo(size_t num_channels, DataType data_type, size_t fixed_point_position = 0);
+ TensorInfo(size_t num_channels, DataType data_type);
/** Constructor
*
- * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements.
- * @param[in] num_channels It indicates the number of channels for each tensor element
- * @param[in] data_type Data type to use for each tensor element
- * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements.
+ * @param[in] num_channels It indicates the number of channels for each tensor element
+ * @param[in] data_type Data type to use for each tensor element
*/
- TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0);
+ TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type);
/** Constructor
*
@@ -146,20 +144,18 @@ public:
*
* Can be used for automatic derivation of the shape by the function.
*
- * @param[in] num_channels Desired number of channels for each tensor element.
- * @param[in] data_type Data type to use for each tensor element.
- * @param[in] fixed_point_position (Optional) Fixed point position when the tensor data type is QS8, QS16 or QS32.
+ * @param[in] num_channels Desired number of channels for each tensor element.
+ * @param[in] data_type Data type to use for each tensor element.
*/
- void init(size_t num_channels, DataType data_type, size_t fixed_point_position = 0);
+ void init(size_t num_channels, DataType data_type);
/** Initialize the metadata structure with the given parameters
*
- * @param[in] tensor_shape Size for each dimension of the tensor in number of elements.
- * @param[in] num_channels Desired number of channels for each tensor element.
- * @param[in] data_type Data type to use for each tensor element.
- * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ * @param[in] tensor_shape Size for each dimension of the tensor in number of elements.
+ * @param[in] num_channels Desired number of channels for each tensor element.
+ * @param[in] data_type Data type to use for each tensor element.
*/
- void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0);
+ void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type);
/** Initialize the metadata structure with the given parameters
*
@@ -169,10 +165,9 @@ public:
* @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor.
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
- * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
*/
void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
- size_t total_size_in_bytes, int fixed_point_position = 0);
+ size_t total_size_in_bytes);
/** Initialize the metadata structure for the given HOG's metadata
*
* @param[in] hog_info HOG's metadata used to allocate normalized HOG space
@@ -190,19 +185,18 @@ public:
* @return Total allocation size including padding in bytes.
*/
size_t init_auto_padding(const TensorShape &tensor_shape, Format format);
- /** Initialize the metadata structure for the given tensor shape, number of channels,
- * data type and fixed point position. (Padding is automatically calculated)
+ /** Initialize the metadata structure for the given tensor shape, number of channels and
+ * data type. (Padding is automatically calculated)
*
* @note The padding used by this method is really conservative so that the tensor can be used for most functions.
*
- * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements
- * @param[in] num_channels It indicates the number of channels for each tensor element
- * @param[in] data_type Data type to use for each tensor element
- * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements
+ * @param[in] num_channels It indicates the number of channels for each tensor element
+ * @param[in] data_type Data type to use for each tensor element
*
* @return Total allocation size including padding in bytes.
*/
- size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0);
+ size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type);
/** Initialize the metadata structure for the given HOG's metadata
*
* @note init_auto_padding will be used for the tensor initialization.
@@ -221,7 +215,6 @@ public:
ITensorInfo &set_num_channels(int num_channels) override;
ITensorInfo &set_format(Format format) override;
ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
- ITensorInfo &set_fixed_point_position(int fixed_point_position) override;
ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
ITensorInfo &reset_padding() override;
@@ -244,10 +237,6 @@ public:
return _offset_first_element_in_bytes;
}
size_t offset_element_in_bytes(const Coordinates &pos) const override;
- int fixed_point_position() const override
- {
- return _fixed_point_position;
- }
size_t element_size() const override
{
return data_size_from_type(_data_type) * _num_channels;
@@ -318,7 +307,6 @@ private:
std::tuple<Strides, size_t, size_t> calculate_padding_requirements(const PaddingSize &padding);
size_t _total_size;
- int _fixed_point_position;
size_t _offset_first_element_in_bytes;
Strides _strides_in_bytes;
size_t _num_channels;
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index da28e131de..89fd4b8bb4 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -74,11 +74,9 @@ enum class DataType
UNKNOWN, /**< Unknown data type */
U8, /**< unsigned 8-bit number */
S8, /**< signed 8-bit number */
- QS8, /**< quantized, symmetric fixed-point 8-bit number */
QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number */
U16, /**< unsigned 16-bit number */
S16, /**< signed 16-bit number */
- QS16, /**< quantized, symmetric fixed-point 16-bit number */
U32, /**< unsigned 32-bit number */
S32, /**< signed 32-bit number */
QS32, /**< quantized, symmetric fixed-point 32-bit number */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 060d5904d4..cfebfa1506 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -110,13 +110,11 @@ inline size_t data_size_from_type(DataType data_type)
{
case DataType::U8:
case DataType::S8:
- case DataType::QS8:
case DataType::QASYMM8:
return 1;
case DataType::U16:
case DataType::S16:
case DataType::F16:
- case DataType::QS16:
return 2;
case DataType::F32:
case DataType::U32:
@@ -185,12 +183,10 @@ inline size_t element_size_from_data_type(DataType dt)
{
case DataType::S8:
case DataType::U8:
- case DataType::QS8:
case DataType::QASYMM8:
return 1;
case DataType::U16:
case DataType::S16:
- case DataType::QS16:
case DataType::F16:
return 2;
case DataType::U32:
@@ -522,14 +518,10 @@ inline DataType get_promoted_data_type(DataType dt)
return DataType::U16;
case DataType::S8:
return DataType::S16;
- case DataType::QS8:
- return DataType::QS16;
case DataType::U16:
return DataType::U32;
case DataType::S16:
return DataType::S32;
- case DataType::QS16:
- return DataType::QS32;
case DataType::QASYMM8:
case DataType::F16:
case DataType::U32:
@@ -1018,29 +1010,7 @@ inline bool is_data_type_quantized(DataType dt)
{
switch(dt)
{
- case DataType::QS8:
case DataType::QASYMM8:
- case DataType::QS16:
- case DataType::QS32:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of fixed point type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of fixed point type, else false.
- */
-inline bool is_data_type_fixed_point(DataType dt)
-{
- switch(dt)
- {
- case DataType::QS8:
- case DataType::QS16:
- case DataType::QS32:
return true;
default:
return false;
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 4ef94f2c6d..1646ebe719 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -545,71 +545,6 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
-/** Return an error if the passed tensor infos have different fixed point data types or different fixed point positions
- *
- * @note: If the first tensor doesn't have fixed point data type, the function returns without throwing an error
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_info_1 The first tensor info to be compared.
- * @param[in] tensor_info_2 The second tensor info to be compared.
- * @param[in] tensor_infos (Optional) Further allowed tensor infos.
- *
- * @return Status
- */
-template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_fixed_point(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
-{
- DataType &&first_data_type = tensor_info_1->data_type();
- const int first_fixed_point_position = tensor_info_1->fixed_point_position();
-
- if(!is_data_type_fixed_point(first_data_type))
- {
- return arm_compute::Status{};
- }
-
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->data_type() != first_data_type;
- }),
- function, file, line, "Tensors have different fixed point data types");
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->fixed_point_position() != first_fixed_point_position;
- }),
- function, file, line, "Tensors have different fixed point positions");
-
- return arm_compute::Status{};
-}
-/** Return an error if the passed tensor have different fixed point data types or different fixed point positions
- *
- * @note: If the first tensor doesn't have fixed point data type, the function returns without throwing an error
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_1 The first tensor to be compared.
- * @param[in] tensor_2 The second tensor to be compared.
- * @param[in] tensors (Optional) Further allowed tensors.
- *
- * @return Status
- */
-template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_fixed_point(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_fixed_point(function, file, line, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
- return arm_compute::Status{};
-}
-#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__))
-#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__))
-
/** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info
*
* @note: If the first tensor info doesn't have asymmetric quantized data type, the function returns without throwing an error
@@ -976,96 +911,5 @@ arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
-
-/** Return an error if the input fixed-point positions are different.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_info_1 The first tensor info to be compared.
- * @param[in] tensor_info_2 The second tensor info to be compared.
- * @param[in] tensor_infos (Optional) Further allowed tensor infos.
- *
- * @return Status
- */
-template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_fixed_point_position(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
-{
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_info_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_info_array.begin(), tensor_info_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->fixed_point_position() != tensor_info_1->fixed_point_position();
- }),
- function, file, line, "Tensors have different fixed-point positions");
- return arm_compute::Status{};
-}
-/** Return an error if the input fixed-point positions are different.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] tensor_1 The first tensor to be compared.
- * @param[in] tensor_2 The second tensor to be compared.
- * @param[in] tensors (Optional) Further allowed tensors.
- *
- * @return Status
- */
-template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_fixed_point_position(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_fixed_point_position(function, file, line, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
- return arm_compute::Status{};
-}
-#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_fixed_point_position(__func__, __FILE__, __LINE__, __VA_ARGS__))
-#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_fixed_point_position(__func__, __FILE__, __LINE__, __VA_ARGS__))
-
-/** Return an error if the fixed-point value is not representable in the specified Q format.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] value The floating point value to be checked.
- * @param[in] tensor_info Input tensor info that has information on data type and fixed-point position.
- *
- * @return Status
- */
-inline arm_compute::Status error_on_value_not_representable_in_fixed_point(const char *function, const char *file, int line,
- float value, const ITensorInfo *tensor_info)
-{
- const int fixed_point_position = tensor_info->fixed_point_position();
- const DataType dt = tensor_info->data_type();
- const unsigned int q_max_range = 0xFFFFFFFFu >> (((sizeof(unsigned int) - element_size_from_data_type(dt)) * 8) + 1);
- const float max_range = q_max_range / (static_cast<float>(1 << fixed_point_position));
-
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(value > max_range, function, file, line,
- "Value %f is not representable in %s with fixed-point position %d", value, string_from_data_type(dt).c_str(), fixed_point_position);
- return arm_compute::Status{};
-}
-/** Return an error an error if the fixed-point value is not representable in the specified Q format.
- *
- * @param[in] function Function in which the error occurred.
- * @param[in] file Name of the file where the error occurred.
- * @param[in] line Line on which the error occurred.
- * @param[in] value The floating point value to be checked.
- * @param[in] tensor Input tensor that has information on data type and fixed-point position.
- *
- * @return Status
- */
-inline arm_compute::Status error_on_value_not_representable_in_fixed_point(const char *function, const char *file, int line,
- float value, const ITensor *tensor)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_value_not_representable_in_fixed_point(function, file, line, value, tensor->info()));
- return arm_compute::Status{};
-}
-#define ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_value_not_representable_in_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__))
-#define ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_value_not_representable_in_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__))
}
#endif /* __ARM_COMPUTE_VALIDATE_H__*/
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index 1847d9c226..177a5e2f38 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -68,9 +68,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type)
case DataType::U8:
os << "U8";
break;
- case DataType::QS8:
- os << "QS8";
- break;
case DataType::QASYMM8:
os << "QASYMM8";
break;
@@ -83,9 +80,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type)
case DataType::S16:
os << "S16";
break;
- case DataType::QS16:
- os << "QS16";
- break;
case DataType::U32:
os << "U32";
break;
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 7100eb4d84..e98fa4bf48 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -44,7 +44,7 @@ public:
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] act_info Activation layer parameters.
*/
@@ -52,7 +52,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h
index 9eea846d97..5aba60ad01 100644
--- a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h
+++ b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h
@@ -33,7 +33,7 @@ class ICLTensor;
/** Basic function to run @ref CLArithmeticAdditionKernel
*
- * @note The tensor data type for the inputs must be U8/QS8/QS16/S16/F16/F32.
+ * @note The tensor data type for the inputs must be U8/S16/F16/F32.
* @note The function performs an arithmetic addition between two tensors.
*/
class CLArithmeticAddition : public ICLSimpleFunction
@@ -41,19 +41,19 @@ class CLArithmeticAddition : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QS8/QASYMM8/QS16/S16/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QS8 (only if @p input1 is QS8), QASYMM8 (only if @p input1 is QASYMM8), QS16 (only if @p input1 is QS16), S16/F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QASYMM8 (only if both inputs are QASYMM8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticAddition
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QS8/QASYMM8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/QS8 (only if @p input1 is QS8), QASYMM8 (only if @p input1 is QASYMM8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QASYMM8 ( only if both inputs are QASYMM8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
index 0d3f5bce6a..b9690806d7 100644
--- a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
+++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ class ICLTensor;
/** Basic function to run @ref CLArithmeticSubtractionKernel
*
- * @note The tensor data type for the inputs must be U8/QS8/QS16/S16/F16/F32.
+ * @note The tensor data type for the inputs must be U8/S16/F16/F32.
* @note The function performs an arithmetic subtraction between two tensors.
*/
class CLArithmeticSubtraction : public ICLSimpleFunction
@@ -42,17 +42,17 @@ class CLArithmeticSubtraction : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticSubtraction
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QS8/QS16/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 9386a86ae5..215046ae7e 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -50,7 +50,7 @@ public:
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
@@ -65,7 +65,7 @@ public:
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
index b632e3b2e3..0bcbfd6276 100644
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
@@ -41,14 +41,14 @@ class CLChannelShuffleLayer : public ICLSimpleFunction
public:
/** Initialize the function
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*/
void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
/** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index d8eef34c62..ae0c9d6459 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -37,7 +37,7 @@ class CLConvertFullyConnectedWeights : public ICLSimpleFunction
public:
/** Initialize the function.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
@@ -45,7 +45,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 82bb71ca6c..c9a74f2a4c 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -49,7 +49,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -68,7 +68,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -88,7 +88,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index d76f0702af..170dc9a613 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -38,7 +38,7 @@ class CLCopy : public ICLSimpleFunction
public:
/** Initialise the function's source and destination.
*
- * @param[in] input Source tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
index 00b3b66c97..d505814e73 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,7 +52,7 @@ public:
CLDepthConcatenateLayer();
/** Initialise the kernel's inputs vector and output.
*
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32.
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*/
void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index c84dc15508..00fa0a686d 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,20 +43,16 @@ public:
*
* Valid conversions Input -> Output :
*
- * - QS8 -> F32
- * - QS16 -> F32
* - U8 -> U16, S16, U32, S32
* - U16 -> U8, U32, S32
* - S16 -> U8, U32, S32
* - U32 -> U8, U16, S16
* - S32 -> U8, U16, S16
- * - F32 -> QS8, QS16
*
- * @param[in] input The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32.
- * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32.
+ * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/Q16/U32/S32/F32.
+ * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32.
* @param[in] policy Conversion policy.
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- * It is not used on fixed point conversion.
*/
void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
};
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 4363224a08..31683c51f9 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -47,7 +47,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -61,7 +61,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index 80a8cf20e3..f4515b6700 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction
public:
/** Initialize the function
*
- * @param[in,out] tensor Source tensor. Data types supported: QS8/U8/S16/QS16
+ * @param[in,out] tensor Source tensor. Data types supported: U8/S16
* @param[in] border_width The border width
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index e19b0e465f..88df4a7f96 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ class CLFlattenLayer : public ICLSimpleFunction
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F16/F32
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
*/
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 127d8acf10..3357868968 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -48,13 +48,13 @@ class CLFullyConnectedLayerReshapeWeights : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights
*
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
*
* @return a status
@@ -86,7 +86,7 @@ public:
CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
* @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
@@ -99,7 +99,7 @@ public:
bool retain_internal_weights = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
*
- * @param[in] input Source tensor. Data type supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
* @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
* @param[in] output Destination tensor. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 41c7467a3f..c4513f29d9 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -69,7 +69,7 @@ public:
*
* @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
*
- * @param[in] a First input tensor (Matrix or Vector A). Data types supported: QS8/QS16/F16/F32
+ * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
* @param[out] output Output tensor. Data type supported: same as @p a
@@ -82,7 +82,7 @@ public:
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: QS8/QS16/F16/F32
+ * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: F16/F32
* @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
* @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
* @param[out] output Output tensor info. Data type supported: same as @p a
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 2c1f7a9d5e..09daa5f568 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -59,7 +59,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data type supported: QASYMM8/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. Data types supported: Same as @p weights.
*/
@@ -67,7 +67,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data type supported: QASYMM8/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[in] output Destination tensor. Data types supported: Same as @p weights.
*
@@ -116,7 +116,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -134,7 +134,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -158,7 +158,7 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Data type supported: Same as @p input.
* @param[in, out] output Output tensor. Data types supported: Same as @p input,
* except for input of QASYMM8 type where output should be of S32 type.
@@ -167,7 +167,7 @@ private:
void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, int gemm_3d_depth = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data types supported: Same as @p input,
* except for input of QASYMM8 type where output should be of S32 type.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
index ae05b0fd9c..48bb6ccb22 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ class CLGEMMInterleave4x4 : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data types supported: U8/S8/QS8/QASYMM8/QS16/U16/S16/F16/U32/S32/F32
+ * @param[in] input First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h b/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h
index ae56548c27..13e643a77d 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ class CLGEMMTranspose1xW : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data type supported: U8/S8/QS8/QASYMM8/QS16/U16/S16/F16/U32/S32/F32
+ * @param[in] input First input tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index 93925778d7..89e20d20f6 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,7 +51,7 @@ public:
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32 (Written to by the border handler)
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler)
* @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
@@ -59,7 +59,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32
* @param[in] output Destination tensor. Dimensions, data type and number of channels must match the input ones.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index a41567570a..638207fc48 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -39,14 +39,14 @@ class CLPermute : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CLPermute.
*
- * @param[in] input First tensor input info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32.
+ * @param[in] input First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
* @param[in] output Output tensor info. Data types supported: same as @p input.
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 75b67cd17c..a59fb4aba8 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -37,13 +37,13 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in, out] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
*/
@@ -51,11 +51,11 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
*
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index a8bdabad98..f7571c1d2d 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,14 +43,14 @@ class CLPoolingLayer : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
- * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer
*
- * @param[in] input Source tensor info. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 411e751290..cf5f7e5e15 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,7 @@ class CLReshapeLayer : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and outputs
*
- * @param[in] input First tensor input. Data type supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[in] input First tensor input. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index 8ab6c160d1..34349ed52b 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -53,14 +53,14 @@ public:
CLSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
*/
void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
*
- * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] output Destination tensor. Data types supported: same as @p input
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 89a2022e76..1e9bb95114 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,13 +40,13 @@ class CLTranspose : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLTranspose
*
- * @param[in] input The input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h
index bcda05274b..289191e030 100644
--- a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h
@@ -50,13 +50,13 @@ public:
CLWidthConcatenateLayer();
/** Initialise the kernel's inputs vector and output.
*
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*/
void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
*
- * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
*
* @return a status
diff --git a/arm_compute/runtime/CPP/functions/CPPPermute.h b/arm_compute/runtime/CPP/functions/CPPPermute.h
index 0094576da6..40d6830425 100644
--- a/arm_compute/runtime/CPP/functions/CPPPermute.h
+++ b/arm_compute/runtime/CPP/functions/CPPPermute.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,14 +38,14 @@ class CPPPermute : public ICPPSimpleFunction
public:
/** Configure the permute CPP kernel
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CPPPermute
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
index 45a883948c..421150e18e 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
@@ -108,7 +108,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -139,7 +139,7 @@ private:
void configure_mm(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref GCGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data types supported: Same as @p input,
* except for input of QASYMM8 type where output should be of S32 type.
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 59f5802d2a..a65146d461 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -44,7 +44,7 @@ public:
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] activation_info Activation layer parameters.
*/
@@ -52,7 +52,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index c72d0b6d61..c29646397c 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -37,17 +37,17 @@ class NEArithmeticAddition : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32
+ * @param[out] output Output tensor. Data types supported: U8/S16/F16/F32
* @param[in] policy Policy to use to handle overflow.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] output Output tensor. Data types supported: U8/S16/F16/F32
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index 751ed1adf1..9b460c1031 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,17 +37,17 @@ class NEArithmeticSubtraction : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32
+ * @param[out] output Output tensor. Data types supported: U8/S16/F16/F32
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
*
- * @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QS8/QS16/S16/F16/F32
+ * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32
+ * @param[in] output Output tensor. Data types supported: U8/S16/F16/F32
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index feb2087aa0..77f06129a3 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -50,7 +50,7 @@ public:
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
@@ -65,7 +65,7 @@ public:
*
* @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
* 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 9b05bd4513..42876a8aec 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ class NECol2Im : public INESimpleFunction
public:
/** Configure the col2im NEON kernel
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
@@ -47,7 +47,7 @@ public:
void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
/** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index bdb157f30b..3ec0390124 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -40,7 +40,7 @@ public:
NEConvertFullyConnectedWeights();
/** Initialize the function.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
@@ -48,7 +48,7 @@ public:
void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/QS32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/QS32/F16/F32.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). Must be in NCHW format.
* @param[in] data_layout The data layout the weights have been trained in.
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index e143814a4e..c4226cbc5d 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -52,7 +52,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -72,7 +72,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -94,7 +94,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
index 5b63b70634..eefb5fa362 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ public:
NEDepthConcatenateLayer();
/** Initialise the kernel's inputs vector and output.
*
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32.
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p inputs_vector.
*/
void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index b235e87b4a..eedadc242d 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,22 +46,14 @@ public:
/** Initialize the function's source, destination
*
* Valid conversions Input -> Output :
- * QS8 -> QS8, F32
* U8 -> U16, S16, S32
* U16 -> U8, U32
* S16 -> U8, S32
- * QS16 -> QS16, F32
- * F32 -> QS8, QS16
*
- * @warning In case of in-place fixed point position conversion make sure that configure has been called
- * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
- * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
- *
- * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
- * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
+ * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16/F32.
+ * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
* @param[in] policy Conversion policy.
* @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
*/
void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
};
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index ae384ffa56..a4a55d10f8 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -54,11 +54,11 @@ public:
/** Set the input, weights, biases and output tensors.
*
* @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/QS16/F16/F32
- * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/F16/F32
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
+ * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
* 5x5 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F32
*
- * @param[in, out] input Input tensor. Data types supported: QS8/QS16/F16/F32.
+ * @param[in, out] input Input tensor. Data types supported: F16/F32.
* @param[in] weights Set of kernels to convolve the input volume.
* Supported sizes: 1x1, 3x3 and 5x5.
* The 3rd dimension must be the same as the input's volume 3rd dimension.
@@ -73,11 +73,11 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
*
* @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/QS16/F16/F32
- * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = QS8/F16/F32
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
+ * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
* 5x5 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F32
*
- * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: F16/F32.
* @param[in] weights Set of kernels to convolve the input volume.
* Supported sizes: 1x1, 3x3 and 5x5.
* The 3rd dimension must be the same as the input's volume 3rd dimension.
@@ -104,7 +104,6 @@ private:
NEActivationLayer _activationlayer_function;
Tensor _accumulator;
bool _has_bias;
- bool _is_fixed_point;
bool _is_activationlayer_enabled;
unsigned int _dim_split;
};
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index b6b7e77471..27a9eea9af 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ public:
*
* @note This function fills the borders within the XY-planes.
*
- * @param[in, out] input Source tensor. Data type supported: U8/QS8/S16/S32/F32
+ * @param[in, out] input Source tensor. Data type supported: U8/S16/S32/F32
* @param[in] border_width Width of the tensor border in pixels.
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index e9c8e27d57..2c259fa178 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ class NEFlattenLayer : public INESimpleFunction
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: QS8/QS16/F16/F32
+ * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F16/F32
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
*/
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 42c9e2d3e9..d4166b3830 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -51,7 +51,7 @@ public:
NEFullyConnectedLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/QS16/F32.
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: F32.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
* @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
* @param[in] is_batched_fc_layer True if it is a batched fully connected layer
@@ -59,7 +59,7 @@ public:
void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer);
/** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights
*
- * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QS8/QS16/F32.
+ * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: F32.
* @param[in] output Destination tensor info. Data type supported: Same as @p input.
* @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
* @param[in] is_batched_fc_layer True if it is a batched fully connected layer
@@ -104,7 +104,7 @@ public:
NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: QS8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data type supported: F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
* @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
@@ -114,7 +114,7 @@ public:
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
*
- * @param[in] input Source tensor info. Data type supported: QS8/QS16/F16/F32.
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
* @param[in] weights Weights tensor info. The weights must be 2 dimensional. Data type supported: Same as @p input
* @param[in] biases Bias tensor info. It can be nullptr. Data type supported:Same as @p input.
* @param[in] output Destination tensor info. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 5d108b2c14..cf059e5c4d 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -66,7 +66,7 @@ public:
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
- * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/QS16/F16/F32
+ * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
* @param[out] d Output tensor. Data type supported: same as @p a
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 7075becf75..68e1145e35 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -60,7 +60,7 @@ public:
NEConvolutionLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/QASYMM8/QS16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. Data types supported: Same as @p weights.
* @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
@@ -69,7 +69,7 @@ public:
void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[in] output Destination tensor. Data types supported: Same as @p weights.
* @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
@@ -116,7 +116,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F32.
+ * Data types supported: QASYMM8/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -134,7 +134,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
@@ -158,7 +158,7 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. Data type supported: Same as @p input.
* @param[out] output Output tensor. Data types supported: Same as @p input,
* except for input of QASYMM8 type where output should be of S32 type.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index b911fd064f..4a6bec03e6 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ class NEGEMMInterleave4x4 : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+ * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 8b9ad136b4..3f8e731d01 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -38,13 +38,13 @@ class NEGEMMTranspose1xW : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/
+ * @param[in] input First input tensor. Data type supported: U8/S8/U16/S16/F16/U32/S32/F32/
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW
*
- * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/
+ * @param[in] input First input tensor. Data type supported: U8/S8/U16/S16/F16/U32/S32/F32/
* @param[in] output Output tensor. Data type supported: same as @p input
*
* @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index caa8a011f6..d888b7e8f5 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -43,7 +43,7 @@ public:
/** Configure the im2col NEON kernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -56,7 +56,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 4b5ad28706..4f1f32fba5 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data type supported: QS8/QS16/F16/F32
+ * and an optional 4th dimension for batch of inputs. Data type supported: F16/F32
* @param[out] output Destination with the same dimensions, data type and number of channels of @p input
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
@@ -63,7 +63,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayer
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data type supported: QS8/QS16/F16/F32
+ * and an optional 4th dimension for batch of inputs. Data type supported: F16/F32
* @param[in] output Destination with the same dimensions, data type and number of channels of @p input
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 58626cd2f2..580d24e415 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -40,7 +40,7 @@ public:
*
* @note Supported permutation vectors : [2, 0, 1], [1, 2, 0]
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*/
@@ -49,7 +49,7 @@ public:
*
* @note Supported permutation vectors : [2, 0, 1], [1, 2, 0]
*
- * @param[in] input The input tensor to permute. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] perm Permutation vector
*
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index ba96ae6cfa..371bb2e13e 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -37,22 +37,22 @@ class NEPixelWiseMultiplication : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in, out] input1 An input tensor. Data types supported: U8/QS8/S16/F16/F32.
+ * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8/QS8/S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/S16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy.
* @param[in] rounding_policy Rounding policy.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication
*
- * @param[in] input1 First tensor info input. Data types supported: U8/QS8/S16/F16/F32.
- * @param[in] input2 Second tensor info input. Data types supported: U8/QS8/S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8/QS8/S16/F16/F32.
+ * @param[in] input1 First tensor info input. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor info input. Data types supported: U8/S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8/S16/F16/F32.
* @param[in] scale Scale to apply after multiplication. Must be positive.
* @param[in] overflow_policy Overflow policy.
* @param[in] rounding_policy Rounding policy.
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 4224f75c77..26858d5cde 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -46,18 +46,18 @@ public:
NEPoolingLayer();
/** Set the input and output tensors.
*
- * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+ * @note F16 is supported for pool sizes 2 and 3 only
*
- * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayer
*
- * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+ * @note F16 is supported for pool sizes 2 and 3 only
*
- * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32.
+ * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 0bab534ebc..a77a5f30dc 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -37,7 +37,7 @@ class NEReshapeLayer : public INESimpleFunction
public:
/** Initialise the kernel's inputs and outputs
*
- * @param[in] input First tensor input. Data type supported: U8/S8/QS8/QASYMM8//U16/S16/QS16/U32/S32/F16/F32
+ * @param[in] input First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 3d981b6f75..61f46004d6 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -51,18 +51,18 @@ public:
NESoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in,out] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32. If the width is not a
+ * @param[in,out] input Source tensor. Data types supported: QASYMM8/F16/F32. If the width is not a
* multiple of the internal processing block size, @ref NEFillBorderKernel replicates the
* last value of each row to the nearest multiple.
* @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] beta (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
+ * @param[in] beta (Optional) A scaling factor for the exponent.
*/
void configure(ITensor *input, ITensor *output, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor. Data types supported: same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
+ * @param[in] beta (Optional) A scaling factor for the exponent.
*
* @return a status
*/
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 6d1e107084..0234288b4b 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,13 +41,13 @@ class NETranspose : public INESimpleFunction
public:
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NETranspose
*
- * @param[in] input The input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] output The output tensor. Data types supported: Same as @p input
*
* @return a status
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index df06aff647..07f8bd7bcd 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -38,8 +38,6 @@ std::string get_cl_type_from_data_type(const DataType &dt)
{
case DataType::U8:
return "uchar";
- case DataType::QS8:
- return "qs8";
case DataType::S8:
return "char";
case DataType::QASYMM8:
@@ -48,8 +46,6 @@ std::string get_cl_type_from_data_type(const DataType &dt)
return "ushort";
case DataType::S16:
return "short";
- case DataType::QS16:
- return "qs16";
case DataType::U32:
return "uint";
case DataType::S32:
@@ -75,13 +71,11 @@ std::string get_data_size_from_data_type(const DataType &dt)
switch(dt)
{
case DataType::U8:
- case DataType::QS8:
case DataType::S8:
case DataType::QASYMM8:
return "8";
case DataType::U16:
case DataType::S16:
- case DataType::QS16:
case DataType::F16:
return "16";
case DataType::U32:
@@ -101,10 +95,6 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt)
{
switch(dt)
{
- case DataType::QS8:
- return "char";
- case DataType::QS16:
- return "short";
case DataType::QS32:
return "int";
default:
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index db4b344935..42cf21350d 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -231,22 +231,16 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "gemm_interleave4x4", "gemm.cl" },
{ "gemm_ma_f16", "gemm.cl" },
{ "gemm_ma_f32", "gemm.cl" },
- { "gemm_ma_qs8", "gemm.cl" },
- { "gemm_ma_qs16", "gemm.cl" },
{ "gemm_mv", "gemv.cl" },
{ "gemm_mv_quantized", "gemv.cl" },
{ "gemm_mm_interleaved_transposed_f16", "gemm.cl" },
{ "gemm_mm_interleaved_transposed_f16_bifrost", "gemm.cl" },
{ "gemm_mm_interleaved_transposed_f32", "gemm.cl" },
{ "gemm_mm_interleaved_transposed_f32_bifrost", "gemm.cl" },
- { "gemm_mm_interleaved_transposed_qs8", "gemm.cl" },
- { "gemm_mm_interleaved_transposed_qs16", "gemm.cl" },
{ "gemm_mm_floating_point", "gemm.cl" },
{ "gemm_mm_floating_point_f16_bifrost", "gemm.cl" },
{ "gemm_mm_floating_point_f32_bifrost", "gemm.cl" },
{ "gemm_mm_floating_point_f32_bifrost_1000", "gemm.cl" },
- { "gemm_mm_qs8", "gemm.cl" },
- { "gemm_mm_qs16", "gemm.cl" },
{ "gemm_lc_vm_f32", "gemm.cl" },
{ "gemm_transpose1xW", "gemm.cl" },
{ "gemmlowp_matrix_a_reduction", "gemmlowp.cl" },
@@ -557,10 +551,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/fill_border.clembed"
},
{
- "fixed_point.h",
-#include "./cl_kernels/fixed_point.hembed"
- },
- {
"floor.cl",
#include "./cl_kernels/floor.clembed"
},
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index a8ea7387d6..373406a6da 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -25,23 +25,6 @@
#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-
-#define CONST_ONE (1 << FIXED_POINT_POSITION)
-#define ABS_OP(a) ABS_SAT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE)
-#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
-#define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
-#define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define MLA_OP(a, b, c) MLA_SAT_OP_EXPAND((a), (b), (c), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define DIV_OP(a, b) DIV_SAT_OP_VEC_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define EXP_OP(a) EXP_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define LOG_OP(a) LOG_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define SQRT_OP(a) DIV_OP(CONST_ONE, INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION))
-#define TANH_OP(a) TANH_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-
-#else /* FIXED_POINT_POSITION */
-
#define CONST_ONE 1.f
#define ABS_OP(a) fabs((a))
#define ADD_OP(a, b) ((a) + (b))
@@ -54,8 +37,6 @@
#define SQRT_OP(a) sqrt((a))
#define TANH_OP(a) tanh((a))
-#endif /* FIXED_POINT_POSITION */
-
// Logistic Activation
inline TYPE logistic_op(TYPE x)
{
@@ -125,9 +106,8 @@ inline TYPE linear_op(TYPE x)
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
* @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
* @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
- * @note In case of fixed point calculations the fixed point position is passed using -DFIXED_POINT_POSITION=position. e.g. -DFIXED_POINT_POSITION=3.
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/arithmetic_op.cl b/src/core/CL/cl_kernels/arithmetic_op.cl
index 8bd28230b7..9efb71b199 100644
--- a/src/core/CL/cl_kernels/arithmetic_op.cl
+++ b/src/core/CL/cl_kernels/arithmetic_op.cl
@@ -23,10 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-#endif /* FIXED_POINT_POSITION */
-
#ifdef SATURATE
#define ADD(x, y) add_sat((x), (y))
#define SUB(x, y) sub_sat((x), (y))
@@ -43,7 +39,7 @@
* e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short
* @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
*
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/QS8/QS16/S16/F16/F32
+ * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -51,7 +47,7 @@
* @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] in2_ptr Pointer to the source tensor. Supported data types: U8/QS8 (only if @p in1_ptr is QS8), QS16 (only if @p in1_ptr is QS16), S16/F16/F32
+ * @param[in] in2_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
* @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -59,7 +55,7 @@
* @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8 (only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8 (only if both inputs are U8), S16/F16/F32
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl
index 9c980da62a..5352af3c5a 100644
--- a/src/core/CL/cl_kernels/batchnormalization_layer.cl
+++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl
@@ -25,25 +25,12 @@
#if defined(VEC_SIZE) && defined(DATA_TYPE)
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-
-#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
-#define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
-#define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define INVSQRT_OP(a) INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION)
-
-#else /* FIXED_POINT_POSITION */
-
#define ADD_OP(a, b) ((a) + (b))
#define SUB_OP(a, b) ((a) - (b))
#define MUL_OP(a, b) ((a) * (b))
#define INVSQRT_OP(a) rsqrt((a))
#define SQCVT_SAT(a) (a)
-#endif /* FIXED_POINT_POSITION */
-
#if defined(FUSED_ACTIVATION)
#include "activation_layer.cl"
#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x)
@@ -53,7 +40,7 @@
/** Apply batch normalization.
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -163,7 +150,7 @@ __kernel void batchnormalization_layer_nchw(TENSOR3D_DECLARATION(input),
/** Apply batch normalization on tensors with NHWC format.
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/channel_shuffle.cl b/src/core/CL/cl_kernels/channel_shuffle.cl
index 26cee9ccdd..23962e1c2e 100644
--- a/src/core/CL/cl_kernels/channel_shuffle.cl
+++ b/src/core/CL/cl_kernels/channel_shuffle.cl
@@ -38,7 +38,7 @@
* @note The number of channels in each group should be given as a preprocessor argument using -DK=num. e.g. -DK=1
* K is equal to num_channels / num_groups.
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/col2im.cl b/src/core/CL/cl_kernels/col2im.cl
index 6e491f33cf..98bf8d1ed4 100644
--- a/src/core/CL/cl_kernels/col2im.cl
+++ b/src/core/CL/cl_kernels/col2im.cl
@@ -23,12 +23,7 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-#endif // FIXED_POINT_POSITION
-
#if defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT)
-#if !defined(FIXED_POINT_POSITION)
#if ELEMENT_SIZE == 1
#define COND_DATA_TYPE char
@@ -100,41 +95,4 @@ __kernel void col2im(
*((__global DATA_TYPE *)(output_ptr + idx + x_clamped.s6 * dst_stride_z)) = data.s6;
*((__global DATA_TYPE *)(output_ptr + idx + x_clamped.s7 * dst_stride_z)) = data.s7;
}
-#else // !defined(FIXED_POINT_POSITION)
-/** This kernel performs a reshaping of the output of the convolution layer.
- *
- * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=qs8
- * @note The width of the output tensor must be passed at compile time using -DWIDTH_OUTPUT: e.g. -DWIDTH_OUTPUT=320
- *
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16
- * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
- */
-__kernel void col2im(
- IMAGE_DECLARATION(src),
- TENSOR3D_DECLARATION(dst),
- uint dst_stride_w)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(dst);
-
- // Compute output offset
- int idx = get_global_id(0) * dst.stride_z + (get_global_id(1) / WIDTH_OUTPUT) * dst_stride_y + (get_global_id(1) % WIDTH_OUTPUT) * dst_stride_x + get_global_id(2) * dst_stride_w;
-
- // Store value
- *((__global DATA_TYPE *)(dst.ptr + idx)) = *((__global DATA_TYPE *)(src.ptr));
-}
-#endif // !defined(FIXED_POINT_POSITION)
#endif // defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT) \ No newline at end of file
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index f97ae13a9a..6ec8383c52 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -25,7 +25,7 @@
/** This kernel concatenates the input tensor into the output tensor along the first dimension
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QASYMM8, QS16, F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8, F16, F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -60,7 +60,7 @@ __kernel void concatenate_width(
/** This kernel concatenates the input tensor into the output tensor along the third dimension
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QS16, F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/convert_fc_weights.cl b/src/core/CL/cl_kernels/convert_fc_weights.cl
index 3c3e8b0dc4..5aadfb36f9 100644
--- a/src/core/CL/cl_kernels/convert_fc_weights.cl
+++ b/src/core/CL/cl_kernels/convert_fc_weights.cl
@@ -32,7 +32,7 @@
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Original input tensor width*height and depth should be given as a preprocessor argument using -DFACTOR_1=size and -DFACTOR_2=size for NCHW and vice versa for NHWC. e.g. -DFACTOR_1=256 and -DFACTOR_2=128
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QS8, QASYMM8, U16, S16, QS16, U32, S32, QS32, F16, F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, QS32, F16, F32
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index 6a70b009c8..2b83e5adf1 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -23,10 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-#endif // FIXED_POINT_POSITION
-
#if defined(DATA_TYPE)
/** This kernel reshapes the tensor's low three dimensions to single column
*
diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl
index a9b7284c83..01491ec1b7 100644
--- a/src/core/CL/cl_kernels/depth_convert.cl
+++ b/src/core/CL/cl_kernels/depth_convert.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,23 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-
-#include "fixed_point.h"
-
-#ifdef SATURATE
-#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position)
-#define CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type##_sat(x, fixed_point_position)
-#else /* SATURATE */
-#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1(x, in_type, out_type, fixed_point_position)
-#define CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position)
-#endif /* SATURATE */
-
-#define CONVERT_UP(x, in_type, out_type, fixed_point_position) CONVERT_UP1(x, in_type, out_type, fixed_point_position)
-#define CONVERT_UP1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position)
-
-#else /* FIXED_POINT_POSITION */
-
#ifdef SATURATE
#define CONVERT_DOWN(x, type) CONVERT_SAT(x, type)
#else /* SATURATE */
@@ -48,22 +31,18 @@
#define CONVERT_UP(x, type) CONVERT(x, type)
-#endif /* FIXED_POINT_POSITION */
-
/** This function performs a down-scaling depth conversion.
*
* @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
* e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
*
- * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
- *
* @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F16, F32
* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: QS8, U8, QS16, U16, S16, U32, S32
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, U16, S16, U32, S32
* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
@@ -84,11 +63,7 @@ __kernel void convert_depth_down(
VEC_DATA_TYPE(DATA_TYPE_IN, 16)
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
-#if defined(FIXED_POINT_POSITION)
- vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr);
-#else /* FIXED_POINT_POSITION */
vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
-#endif /* FIXED_POINT_POSITION */
}
/** This function performs a up-scaling depth conversion.
@@ -96,9 +71,7 @@ __kernel void convert_depth_down(
* @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
* e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
*
- * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: U8, QS8, U16, S16, QS16, U32 or S32
+ * @param[in] in_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32
* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
@@ -125,9 +98,5 @@ __kernel void convert_depth_up(
VEC_DATA_TYPE(DATA_TYPE_IN, 16)
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
-#if defined(FIXED_POINT_POSITION)
- vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr);
-#else /* FIXED_POINT_POSITION */
vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
-#endif /* FIXED_POINT_POSITION */
}
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index f3aa0d6dd8..9a8b57e4c4 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -527,7 +527,7 @@ __kernel void depthwise_weights_reshape(
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT, -DDEPTH_MULTIPLIER
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -587,7 +587,7 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The convolution information must be passed at compile time using -DCONV_WIDTH, -DCONV_HEIGHT, e.g -DCONV_WIDTH=32, -DCONV_HEIGHT=42
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 21e9c873ac..4908bb0b31 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
/** This performs the dequantization of 8-bit unsigned integers to floating point.
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/direct_convolution1x1.cl b/src/core/CL/cl_kernels/direct_convolution1x1.cl
index 817c261ba2..7a308c99e2 100644
--- a/src/core/CL/cl_kernels/direct_convolution1x1.cl
+++ b/src/core/CL/cl_kernels/direct_convolution1x1.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,24 +23,12 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-
-#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE_PROMOTED, 8)
-#define MUL_OP(a, b) MUL_SAT_OP_EXPAND(CONVERT((a), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), CONVERT((b), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), DATA_TYPE_PROMOTED, 8, FIXED_POINT_POSITION)
-
-// There is no need to have a larger intermediate type for qs32 because all the arguments are already promoted
-MULQ_SAT_IMPL(qs32x8, qs32x8)
-
-#else /* FIXED_POINT_POSITION */
#undef CONVERT_SAT
#define ADD_OP(a, b) ((a) + (b))
#define MUL_OP(a, b) ((a) * (b))
#define CONVERT_SAT(a, b) ((a))
-#endif /* FIXED_POINT_POSITION */
-
#if defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
#if STRIDE_X == 3
diff --git a/src/core/CL/cl_kernels/direct_convolution3x3.cl b/src/core/CL/cl_kernels/direct_convolution3x3.cl
index a7abc9ff1d..824306f2ba 100644
--- a/src/core/CL/cl_kernels/direct_convolution3x3.cl
+++ b/src/core/CL/cl_kernels/direct_convolution3x3.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,25 +23,12 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-
-#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE_PROMOTED, 8)
-#define MUL_OP(a, b) MUL_SAT_OP_EXPAND(CONVERT((a), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), CONVERT((b), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), DATA_TYPE_PROMOTED, 8, FIXED_POINT_POSITION)
-
-// There is no need to have a larger intermediate type for qs32 because all the arguments are already promoted
-MULQ_SAT_IMPL(qs32x8, qs32x8)
-
-#else /* FIXED_POINT_POSITION */
-
#undef CONVERT_SAT
#define ADD_OP(a, b) ((a) + (b))
#define MUL_OP(a, b) ((a) * (b))
#define CONVERT_SAT(a, b) ((a))
-#endif /* FIXED_POINT_POSITION */
-
#if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
#if STRIDE_X == 1
@@ -86,7 +73,7 @@ MULQ_SAT_IMPL(qs32x8, qs32x8)
* @note The third dimensions of the weights tensors must be passed at compile time using -DWEIGHTS_DEPTH
* @note If biases are used then -DHAS_BIAS has to be passed at compile time
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl
index 33a9495d66..9d6a2b8b5a 100644
--- a/src/core/CL/cl_kernels/fill_border.cl
+++ b/src/core/CL/cl_kernels/fill_border.cl
@@ -23,10 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-#endif /* FIXED_POINT_POSITION */
-
/** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel.
*
* @attention The DATA_TYPE needs to be passed at the compile time.
diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h
deleted file mode 100644
index 46fa645c2b..0000000000
--- a/src/core/CL/cl_kernels/fixed_point.h
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_FIXED_POINT_H
-#define ARM_COMPUTE_FIXED_POINT_H
-
-#define TYPE_ALIAS(type, alias) \
- typedef type alias; \
- typedef type alias##x##1; \
- typedef type##2 alias##x##2; \
- typedef type##3 alias##x##3; \
- typedef type##4 alias##x##4; \
- typedef type##8 alias##x##8; \
- typedef type##16 alias##x##16;
-
-TYPE_ALIAS(char, qs8)
-TYPE_ALIAS(short, qs16)
-TYPE_ALIAS(int, qs32)
-
-#define qs8_MIN ((char)CHAR_MIN)
-#define qs8_MAX ((char)CHAR_MAX)
-#define qs16_MIN ((short)SHRT_MIN)
-#define qs16_MAX ((short)SHRT_MAX)
-#define qs32_MIN ((int)INT_MIN)
-#define qs32_MAX ((int)INT_MAX)
-
-#define qu8_MIN ((uchar)0)
-#define qu8_MAX ((uchar)UCHAR_MAX)
-#define qu16_MIN ((ushort)0)
-#define qu16_MAX ((ushort)USHRT_MAX)
-#define qu32_MIN ((uint)0)
-#define qu32_MAX ((uint)UINT_MAX)
-
-#define qs8_TYPE char
-#define qs8x1_TYPE char
-#define qs8x2_TYPE char2
-#define qs8x3_TYPE char3
-#define qs8x4_TYPE char4
-#define qs8x8_TYPE char8
-#define qs8x16_TYPE char16
-
-#define qs16_TYPE short
-#define qs16x1_TYPE short
-#define qs16x2_TYPE short2
-#define qs16x3_TYPE short3
-#define qs16x4_TYPE short4
-#define qs16x8_TYPE short8
-#define qs16x16_TYPE short16
-
-#define qs32_TYPE int
-#define qs32x1_TYPE int
-#define qs32x2_TYPE int2
-#define qs32x3_TYPE int3
-#define qs32x4_TYPE int4
-#define qs32x8_TYPE int8
-#define qs32x16_TYPE int16
-
-/* All internal constants are represented in the maximum supported fixed point format (QS16),
- * thus we define an additional shift parameter required to convert the constant
- * from the maximum supported format to the require one.
- */
-#define qs8_SHIFT 8
-#define qs16_SHIFT 0
-
-#undef VEC_DATA_TYPE_STR
-#undef VEC_DATA_TYPE
-#undef CONVERT_STR
-#undef CONVERT
-#undef CONVERT_SAT_STR
-#undef CONVERT_SAT
-
-#define VEC_DATA_TYPE_STR(type, size) type##x##size
-#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
-
-#define CONVERT_STR3(x, type, rtype) (convert_##rtype((x)))
-#define CONVERT_STR2(x, type, rtype) CONVERT_STR3(x, type, rtype)
-#define CONVERT_STR(x, type) CONVERT_STR2(x, type, type##_TYPE)
-#define CONVERT(x, type) CONVERT_STR(x, type)
-
-#define CONVERT_SAT_STR3(x, type, rtype) (convert_##rtype##_sat((x)))
-#define CONVERT_SAT_STR2(x, type, rtype) CONVERT_SAT_STR3(x, type, rtype)
-#define CONVERT_SAT_STR(x, type) CONVERT_SAT_STR2(x, type, type##_TYPE)
-#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
-
-/** Computes saturating absolute value of fixed point vector.
- *
- * @param[in] type the actual data type.
- *
- * @return The result of the fixed point absolute value.
- */
-#define ABSQ_SAT_IMPL(type) \
- inline type abs_##type##_sat(type VopA) \
- { \
- return CONVERT_SAT(abs(VopA), type); \
- }
-
-ABSQ_SAT_IMPL(qs8x16)
-ABSQ_SAT_IMPL(qs16x8)
-
-#define ABS_SAT_OP_EXPAND_STR(a, type, size) abs_##type##x##size##_sat((a))
-#define ABS_SAT_OP_EXPAND(a, type, size) ABS_SAT_OP_EXPAND_STR(a, type, size)
-
-/** Computes max of fixed point types.
- *
- * @param[in] type the actual data type.
- *
- * @return The result of the fixed point maximum.
- */
-#define MAXQ_IMPL(type) \
- inline type max_##type(type VopA, type VopB) \
- { \
- return max(VopA, VopB); \
- }
-
-MAXQ_IMPL(qs8x1)
-MAXQ_IMPL(qs8x2)
-MAXQ_IMPL(qs8x4)
-MAXQ_IMPL(qs8x8)
-MAXQ_IMPL(qs8x16)
-MAXQ_IMPL(qs16x1)
-MAXQ_IMPL(qs16x2)
-MAXQ_IMPL(qs16x4)
-MAXQ_IMPL(qs16x8)
-MAXQ_IMPL(qs16x16)
-
-#define MAX_OP_EXPAND_STR(a, b, type, size) max_##type##x##size((a), (b))
-#define MAX_OP_EXPAND(a, b, type, size) MAX_OP_EXPAND_STR(a, b, type, size)
-
-/** Computes saturated addition of fixed point types.
- *
- * @param[in] type the actual data type.
- *
- * @return The result of the fixed point addition. The result is saturated in case of overflow
- */
-#define ADDQ_SAT_IMPL(type) \
- inline type add_sat_##type(type VopA, type VopB) \
- { \
- return add_sat(VopA, VopB); \
- }
-
-ADDQ_SAT_IMPL(qs8x1)
-ADDQ_SAT_IMPL(qs8x2)
-ADDQ_SAT_IMPL(qs8x4)
-ADDQ_SAT_IMPL(qs8x8)
-ADDQ_SAT_IMPL(qs8x16)
-ADDQ_SAT_IMPL(qs16x1)
-ADDQ_SAT_IMPL(qs16x2)
-ADDQ_SAT_IMPL(qs16x4)
-ADDQ_SAT_IMPL(qs16x8)
-ADDQ_SAT_IMPL(qs16x16)
-ADDQ_SAT_IMPL(qs32x1)
-ADDQ_SAT_IMPL(qs32x2)
-ADDQ_SAT_IMPL(qs32x4)
-ADDQ_SAT_IMPL(qs32x8)
-ADDQ_SAT_IMPL(qs32x16)
-
-#define ADD_SAT_OP_EXPAND_STR(a, b, type, size) add_sat_##type##x##size((a), (b))
-#define ADD_SAT_OP_EXPAND(a, b, type, size) ADD_SAT_OP_EXPAND_STR(a, b, type, size)
-
-/** Computes saturated subtraction of fixed point types.
- *
- * @param[in] type the actual data type.
- *
- * @return The result of the fixed point subtraction. The result is saturated in case of overflow
- */
-#define SUBQ_SAT_IMPL(type) \
- inline type sub_sat_##type(type VopA, type VopB) \
- { \
- return sub_sat(VopA, VopB); \
- }
-
-SUBQ_SAT_IMPL(qs8x1)
-SUBQ_SAT_IMPL(qs8x2)
-SUBQ_SAT_IMPL(qs8x4)
-SUBQ_SAT_IMPL(qs8x8)
-SUBQ_SAT_IMPL(qs8x16)
-SUBQ_SAT_IMPL(qs16x1)
-SUBQ_SAT_IMPL(qs16x2)
-SUBQ_SAT_IMPL(qs16x4)
-SUBQ_SAT_IMPL(qs16x8)
-SUBQ_SAT_IMPL(qs16x16)
-
-#define SUB_SAT_OP_EXPAND_STR(a, b, type, size) sub_sat_##type##x##size((a), (b))
-#define SUB_SAT_OP_EXPAND(a, b, type, size) SUB_SAT_OP_EXPAND_STR(a, b, type, size)
-
-/* Multiply of two fixed point numbers
- *
- * @param[in] type the actual data type.
- * @param[in] itype the intermediate data type.
- *
- * @return The result of the fixed point multiplication.
- */
-#define MULQ_IMPL(type, itype) \
- inline type mul_##type(type VopA, type VopB, int fixed_point_position) \
- { \
- itype round_val = (itype)(1 << (fixed_point_position - 1)); \
- itype res = CONVERT((VopA), itype) * CONVERT((VopB), itype) + round_val; \
- return CONVERT((res >> (itype)fixed_point_position), type); \
- }
-
-MULQ_IMPL(qs8x8, qs16x8)
-MULQ_IMPL(qs16x8, qs32x8)
-MULQ_IMPL(qs8x16, qs16x16)
-MULQ_IMPL(qs16x16, qs32x16)
-
-#define MUL_OP_EXPAND_STR(a, b, type, size, position) mul_##type##x##size((a), (b), (position))
-#define MUL_OP_EXPAND(a, b, type, size, position) MUL_OP_EXPAND_STR(a, b, type, size, position)
-
-/* Saturate multiply of two fixed point numbers
- *
- * @param[in] type the actual data type.
- * @param[in] itype the intermediate data type.
- *
- * @return The result of the fixed point multiplication. The result is saturated in case of overflow
- */
-#define MULQ_SAT_IMPL(type, itype) \
- inline type mul_sat_##type(type VopA, type VopB, int fixed_point_position) \
- { \
- itype round_val = (itype)(1 << (fixed_point_position - 1)); \
- itype res = mad_sat(CONVERT((VopA), itype), CONVERT((VopB), itype), round_val); \
- return CONVERT_SAT((res >> (itype)fixed_point_position), type); \
- }
-
-MULQ_SAT_IMPL(qs8x1, qs16x1)
-MULQ_SAT_IMPL(qs8x2, qs16x2)
-MULQ_SAT_IMPL(qs8x3, qs16x3)
-MULQ_SAT_IMPL(qs8x4, qs16x4)
-MULQ_SAT_IMPL(qs8x8, qs16x8)
-MULQ_SAT_IMPL(qs8x16, qs16x16)
-MULQ_SAT_IMPL(qs16x1, qs32x1)
-MULQ_SAT_IMPL(qs16x2, qs32x2)
-MULQ_SAT_IMPL(qs16x3, qs32x3)
-MULQ_SAT_IMPL(qs16x4, qs32x4)
-MULQ_SAT_IMPL(qs16x8, qs32x8)
-MULQ_SAT_IMPL(qs16x16, qs32x16)
-
-#define MUL_SAT_OP_EXPAND_STR(a, b, type, size, position) mul_sat_##type##x##size((a), (b), (position))
-#define MUL_SAT_OP_EXPAND(a, b, type, size, position) MUL_SAT_OP_EXPAND_STR(a, b, type, size, position)
-
-/** Saturate multiply-accumulate
- *
- * @param[in] type the actual data type.
- * @param[in] itype the intermediate data type.
- *
- * @return The result of the fixed point multiply-accumulate. The result is saturated in case of overflow
- */
-#define MLAQ_SAT_IMPL(type, itype) \
- type mla_sat_##type(type VopA, type VopB, type VopC, int fixed_point_position) \
- { \
- itype res = mad_sat(CONVERT(VopB, itype), CONVERT(VopC, itype), (itype)(1 << (fixed_point_position - 1))); \
- return add_sat(VopA, CONVERT_SAT(res >> (itype)fixed_point_position, type)); \
- }
-
-MLAQ_SAT_IMPL(qs8x8, qs16x8)
-MLAQ_SAT_IMPL(qs8x16, qs16x16)
-MLAQ_SAT_IMPL(qs16x8, qs32x8)
-
-#define MLA_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mla_sat_##type##x##size((a), (b), (c), (position))
-#define MLA_SAT_OP_EXPAND(a, b, c, type, size, position) MLA_SAT_OP_EXPAND_STR(a, b, c, type, size, position)
-
-/** Saturate multiply-accumulate long
- *
- * @param[in] type the actual data type.
- * @param[in] itype the intermediate data type.
- *
- * @return The result of the fixed point multiply-accumulate long. The result is saturated in case of overflow
- */
-#define MLALQ_SAT_IMPL(type, itype) \
- itype mlal_sat_##type(itype VopA, type VopB, type VopC, int fixed_point_position) \
- { \
- itype res = mad_sat(CONVERT(VopB, itype), CONVERT(VopC, itype), (itype)(1 << (fixed_point_position - 1))); \
- return add_sat(VopA, res >> (itype)fixed_point_position); \
- }
-
-MLALQ_SAT_IMPL(qs8x8, qs16x8)
-MLALQ_SAT_IMPL(qs16x8, qs32x8)
-
-#define MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mlal_sat_##type##x##size((a), (b), (c), (position))
-#define MLAL_SAT_OP_EXPAND(a, b, c, type, size, position) MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position)
-
-/** Saturate division of two fixed point vectors
- *
- * @param[in] stype the actual scalar data type.
- * @param[in] type the actual data type.
- * @param[in] itype the intermediate data type.
- *
- * @return The result of the fixed point division. The result is saturated in case of overflow
- */
-#define DIVQ_SAT_IMPL(stype, type, itype) \
- inline type div_sat_##type(type VopA, type VopB, int fixed_point_position) \
- { \
- itype conv_a = CONVERT((VopA), itype); \
- itype denominator = CONVERT((VopB), itype); \
- itype numerator = conv_a << (itype)(fixed_point_position); \
- itype res = select((itype)(numerator / denominator), select((itype)stype##_MAX, (itype)stype##_MIN, (itype)(conv_a < (itype)0)), (itype)(denominator == (itype)0)); \
- return CONVERT_SAT((res), type); \
- }
-
-DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16)
-DIVQ_SAT_IMPL(qs16, qs16x8, qs32x8)
-DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16)
-DIVQ_SAT_IMPL(qs8, qs8, qs16)
-DIVQ_SAT_IMPL(qs16, qs16, qs32)
-
-#define DIV_SAT_OP_EXPAND_STR(a, b, type, position) div_sat_##type((a), (b), (position))
-#define DIV_SAT_OP_EXPAND(a, b, type, position) DIV_SAT_OP_EXPAND_STR(a, b, type, position)
-
-#define DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position))
-#define DIV_SAT_OP_VEC_EXPAND(a, b, type, size, position) DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position)
-
-/** Saturate exponential of a fixed point vector
- *
- * @note Implemented approach uses taylor polynomial to approximate the exponential function.
- *
- * @param[in] stype the actual scalar data type.
- * @param[in] type the actual data type.
- * @param[in] size the number of the calculated elements.
- *
- * @return The result of the fixed point exponential. The result is saturated in case of overflow
- */
-#define EXPQ_IMPL(stype, type, size) \
- inline type exp_sat_##type(type VopA, int fixed_point_position) \
- { \
- type const_one = (type)(1 << (fixed_point_position)); \
- type ln2 = (type)((((0x58B9 >> (14 - fixed_point_position))) + 1) >> 1); \
- type inv_ln2 = (type)((((0x38AA >> (14 - fixed_point_position)) + 1) >> 1)) | const_one; \
- type A = (type)(((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1); \
- type B = (type)(((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1); \
- type C = (type)(((0x1693 >> (14 - fixed_point_position)) + 1) >> 1); \
- type D = (type)(((0x0592 >> (14 - fixed_point_position)) + 1) >> 1); \
- type m = MUL_SAT_OP_EXPAND(VopA, inv_ln2, stype, size, fixed_point_position); \
- type dec_m = m >> (type)fixed_point_position; \
- type alpha = MUL_SAT_OP_EXPAND(dec_m << (type)fixed_point_position, ln2, stype, size, fixed_point_position); \
- alpha = CONVERT(abs_diff(VopA, alpha), type); \
- type sum = add_sat(MUL_SAT_OP_EXPAND(alpha, D, stype, size, fixed_point_position), C); \
- sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), B); \
- sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), A); \
- sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), const_one); \
- return select((type)stype##_MAX, select(sum << dec_m, sum >> -dec_m, dec_m < (type)0), clz(sum) > dec_m); /* Saturate result if needed */ \
- }
-
-EXPQ_IMPL(qs8, qs8x2, 2)
-EXPQ_IMPL(qs8, qs8x4, 4)
-EXPQ_IMPL(qs8, qs8x8, 8)
-EXPQ_IMPL(qs8, qs8x16, 16)
-EXPQ_IMPL(qs16, qs16x2, 2)
-EXPQ_IMPL(qs16, qs16x4, 4)
-EXPQ_IMPL(qs16, qs16x8, 8)
-EXPQ_IMPL(qs16, qs16x16, 16)
-
-#define EXP_OP_EXPAND_STR(a, type, size, position) exp_sat_##type##x##size((a), (position))
-#define EXP_OP_EXPAND(a, type, size, position) EXP_OP_EXPAND_STR(a, type, size, position)
-
-/** Saturate logarithm of a fixed point vector
- *
- * @note Implemented approach uses taylor polynomial to approximate the logarithm function.
- *
- * @param[in] stype the actual scalar data type.
- * @param[in] type the actual data type.
- * @param[in] size the number of the calculated elements.
- *
- * @return The result of the fixed point logarithm. The result is saturated in case of overflow
- */
-#define LOGQ_IMPL(stype, type, size) \
- inline type log_sat_##type(type VopA, int fixed_point_position) \
- { \
- type const_one = (type)(1 << (fixed_point_position)); \
- type ln2 = (type)(0x58B9 >> (15 - fixed_point_position)); /* 1.4384189 */ \
- type A = (type)(0x5C0F >> (14 - fixed_point_position)); /* 1.4384189 */ \
- type B = -(type)(0x56AE >> (15 - fixed_point_position)); /* -0.6771900 */ \
- type C = (type)(0x2933 >> (15 - fixed_point_position)); /* 0.3218538 */ \
- type D = -(type)(0x0AA7 >> (15 - fixed_point_position)); /* -0.0832229 */ \
- type inter_a = select(VopA, DIV_SAT_OP_VEC_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one); \
- type shift_val = (type)(15 - stype##_SHIFT) - clz(inter_a >> (type)fixed_point_position); \
- inter_a = inter_a >> shift_val; \
- inter_a = sub_sat(inter_a, const_one); \
- type sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, D, stype, size, fixed_point_position), C); \
- sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), B); \
- sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), A); \
- sum = MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position); \
- sum = MUL_SAT_OP_EXPAND(add_sat(sum, shift_val << (type)fixed_point_position), ln2, stype, size, fixed_point_position); \
- return select(select(sum, -sum, VopA < const_one), (type)0, VopA < (type)0); /* Saturate result if needed */ \
- }
-
-LOGQ_IMPL(qs8, qs8x16, 16)
-LOGQ_IMPL(qs16, qs16x8, 8)
-LOGQ_IMPL(qs16, qs16x16, 16)
-
-#define LOG_OP_EXPAND_STR(a, type, size, position) log_sat_##type##x##size((a), (position))
-#define LOG_OP_EXPAND(a, type, size, position) LOG_OP_EXPAND_STR(a, type, size, position)
-
-/** Saturate inverse square root of a fixed point vector
- *
- * @note Implemented approach uses Newton's method to approximate the inverse square root function.
- *
- * @param[in] stype the actual scalar data type.
- * @param[in] type the actual data type.
- * @param[in] size the number of the calculated elements.
- *
- * @return The result of the fixed point inverse square root. The result is saturated in case of overflow
- */
-#define INVSQRTQ_IMPL(stype, type, size) \
- inline type invsqrt_sat_##type(type VopA, int fixed_point_position) \
- { \
- type const_three = (type)(3 << (fixed_point_position)); \
- type shift_value = (type)(16 - stype##_SHIFT) - (clz(VopA) + (type)fixed_point_position); \
- type temp = select((type)(VopA >> shift_value), select((type)stype##_MAX, (type)(VopA << (-shift_value)), (type)(clz(VopA) > (-shift_value))), (type)(shift_value < (type)0)); \
- type x = temp; \
- x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
- x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
- x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
- if(sizeof((stype)(1)) > 1) /* Perform more iterations if datatype is QS16 */ \
- { \
- x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
- x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
- } \
- type shift_value2 = select(shift_value >> 1, (-shift_value) >> 1, shift_value < (type)0); \
- return select((type)(x >> shift_value2), select((type)stype##_MAX, (type)(x << shift_value2), (type)(clz(x) > shift_value2)), (type)(shift_value < (type)0)); /* Saturate result if needed */ \
- }
-
-INVSQRTQ_IMPL(qs8, qs8x1, 1)
-INVSQRTQ_IMPL(qs16, qs16x1, 1)
-INVSQRTQ_IMPL(qs8, qs8x16, 16)
-INVSQRTQ_IMPL(qs16, qs16x8, 8)
-
-#define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position))
-#define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position)
-
-/** Saturate hyperbolic tangent of a fixed point vector
- *
- * tanh(x) = (e^2x - 1)/(e^2x + 1)
- *
- * @param[in] stype the actual scalar data type.
- * @param[in] type the actual data type.
- * @param[in] size the number of the calculated elements.
- *
- * @return The result of the fixed point hyperbolic tangent. The result is saturated in case of overflow
- */
-#define TANHQ_IMPL(stype, type, size) \
- inline type tanh_sat_##type(type VopA, int fixed_point_position) \
- { \
- type const_one = (type)(1 << (fixed_point_position)); \
- type const_two = (type)(2 << (fixed_point_position)); \
- type exp2x = EXP_OP_EXPAND(MUL_SAT_OP_EXPAND(const_two, VopA, stype, size, fixed_point_position), stype, size, fixed_point_position); \
- type num = SUB_SAT_OP_EXPAND(exp2x, const_one, stype, size); \
- type den = ADD_SAT_OP_EXPAND(exp2x, const_one, stype, size); \
- return DIV_SAT_OP_VEC_EXPAND(num, den, stype, size, fixed_point_position); \
- }
-
-TANHQ_IMPL(qs8, qs8x16, 16)
-TANHQ_IMPL(qs16, qs16x8, 8)
-
-#define TANH_OP_EXPAND_STR(a, type, size, position) tanh_sat_##type##x##size((a), (position))
-#define TANH_OP_EXPAND(a, type, size, position) TANH_OP_EXPAND_STR(a, type, size, position)
-
-#define floatx16 float16
-#define float16_TYPE float16
-
-#define CONVERTQ_DOWN_IMPL(in_type, out_type) \
- inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \
- { \
- return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5f, (in_type)0.5f, isgreater(a, (in_type)0)), out_type); \
- }
-
-CONVERTQ_DOWN_IMPL(float16, qs8x16)
-CONVERTQ_DOWN_IMPL(float16, qs16x16)
-
-#define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type) \
- inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position) \
- { \
- return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5f, (in_type)0.5f, isgreater(a, (in_type)0)), out_type); \
- }
-
-CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16)
-CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)
-
-#define CONVERTQ_UP_IMPL(in_type, out_type) \
- inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \
- { \
- return CONVERT(a, out_type) / (1 << fixed_point_position); \
- }
-
-CONVERTQ_UP_IMPL(qs8x16, float16)
-CONVERTQ_UP_IMPL(qs16x16, float16)
-
-#define SQCVT_SAT_IMPL(type) \
- inline type sqcvt_##type##_sat(float a, int fixed_point_position) \
- { \
- return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \
- }
-
-SQCVT_SAT_IMPL(qs8)
-SQCVT_SAT_IMPL(qs16)
-
-#define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position))
-#define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position)
-
-#endif // ARM_COMPUTE_FIXED_POINT_H
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index e969e847d7..f75161ca0a 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -23,10 +23,6 @@
*/
#include "helpers.h"
-#ifdef FIXED_POINT_POSITION
-#include "fixed_point.h"
-#endif // FIXED_POINT_POSITION
-
#if defined(TRANSPOSE_W) && defined(MULT_TRANSPOSE1XW_WIDTH)
#if ELEMENT_SIZE == 1
@@ -44,7 +40,7 @@
* @note The transposition width must be passed at compile time using -DTRANSPOSE_W (i.e. -DTRANSPOSE_W)
* @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (i.e. -DMULT_TRANSPOSE1XW_WIDTH=2)
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes)
@@ -93,7 +89,7 @@ __kernel void gemm_transpose1xW(TENSOR3D_DECLARATION(src),
* @note The data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=float)
* @note The multiplication factor for the height of the 4x4 interleaved block must be passed at compile time using -DMULT_INTERLEAVE4X4_HEIGHT (i.e. -DMULT_INTERLEAVE4X4_HEIGHT=2)
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes)
@@ -1085,248 +1081,6 @@ __kernel void gemm_mm_interleaved_transposed_f16_bifrost(IMAGE_DECLARATION(src0)
#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
-#if defined(FIXED_POINT_POSITION)
-/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in 8 bit fixed point precision
- * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_8bit and @ref gemm_transpose1x16 before running the matrix multiplication
- *
- * @note The number of columns of matrix B and the optional alpha's value need to be passed at compile time using -DCOLS_B and -DALPHA
- * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (i.e. -DMULT_TRANSPOSE1XW_WIDTH=2)
- * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at compile time using -DMULT_INTERLEAVE4X4_HEIGHT (i.e. -DMULT_INTERLEAVE4X4_HEIGHT=2)
- * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid out-of-bounds reads, the number of channels of matrix B must be passed at compile time using MATRIX_B_DEPTH (i.e. -DMATRIX_B_DEPTH=16)
- * This case can happen when GEMM is used to perform the element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have multiple inputs (i.e. a = [K, M, 16, Batches], b = [N, K, 16])
- * @note:ALPHA must be passed in 8 bit fixed point format
- *
- * @param[in] src0_ptr Pointer to the source matrix. Supported data types: QS8
- * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr
- * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- */
-__kernel void gemm_mm_interleaved_transposed_qs8(IMAGE_DECLARATION(src0),
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(dst),
- uint src0_stride_z,
- uint src1_stride_z,
- uint dst_stride_z)
-{
- int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
- int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
- int z = get_global_id(2);
-
- // Offset
- const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
- const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 16;
-
- // src_addr_a = address of matrix A
- // src_addr_b = address of matrix B
- int src0_addr_in_bytes = z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
- int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
-
-#if defined(MATRIX_B_DEPTH)
- // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
- src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
-#else // defined(MATRIX_B_DEPTH)
- src1_addr_in_bytes += z * src1_stride_z;
-#endif // defined(MATRIX_B_DEPTH)
-
- __global char *src_addr_a = (__global char *)(src0_ptr + src0_addr_in_bytes);
- __global char *src_addr_b = (__global char *)(src1_ptr + src1_addr_in_bytes);
-
- // Compute end row address for matrix B
- __global char *src_end_addr_b = src_addr_b + COLS_B;
-
- src_addr_a += offset_row_a;
- src_addr_b += offset_row_b;
-
- // Reset accumulators
- short8 c00 = 0.0f;
- short8 c10 = 0.0f;
- short8 c20 = 0.0f;
- short8 c30 = 0.0f;
- short8 c01 = 0.0f;
- short8 c11 = 0.0f;
- short8 c21 = 0.0f;
- short8 c31 = 0.0f;
-
- // This for loop performs 1 accumulation for each iteration
- for(; src_addr_b < src_end_addr_b; src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
- {
- // Load values from matrix A (interleaved) and matrix B (transposed)
- char4 a0 = vload4(0, src_addr_a);
- char16 b0 = vload16(0, src_addr_b);
-
- c00 = mlal_sat_qs8x8(c00, (char8)a0.s0, b0.s01234567, FIXED_POINT_POSITION);
- c10 = mlal_sat_qs8x8(c10, (char8)a0.s1, b0.s01234567, FIXED_POINT_POSITION);
- c20 = mlal_sat_qs8x8(c20, (char8)a0.s2, b0.s01234567, FIXED_POINT_POSITION);
- c30 = mlal_sat_qs8x8(c30, (char8)a0.s3, b0.s01234567, FIXED_POINT_POSITION);
-
- c01 = mlal_sat_qs8x8(c01, (char8)a0.s0, b0.s89ABCDEF, FIXED_POINT_POSITION);
- c11 = mlal_sat_qs8x8(c11, (char8)a0.s1, b0.s89ABCDEF, FIXED_POINT_POSITION);
- c21 = mlal_sat_qs8x8(c21, (char8)a0.s2, b0.s89ABCDEF, FIXED_POINT_POSITION);
- c31 = mlal_sat_qs8x8(c31, (char8)a0.s3, b0.s89ABCDEF, FIXED_POINT_POSITION);
- }
-
- // Compute destination address
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Multiply by the weight of matrix product
- char16 c00_qs8 = convert_char16_sat((short16)(c00, c01));
- char16 c10_qs8 = convert_char16_sat((short16)(c10, c11));
- char16 c20_qs8 = convert_char16_sat((short16)(c20, c21));
- char16 c30_qs8 = convert_char16_sat((short16)(c30, c31));
-
-#if defined(ALPHA)
- c00_qs8 = mul_sat_qs8x16(c00_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
- c10_qs8 = mul_sat_qs8x16(c10_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
- c20_qs8 = mul_sat_qs8x16(c20_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
- c30_qs8 = mul_sat_qs8x16(c30_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
-
- // Compute dst address
- __global uchar *dst_addr = offset(&dst, 0, 0);
-
- // Add offset for batched GEMM
- dst_addr += z * dst_stride_z;
-
- // Store 16x4 block
- vstore16(c00_qs8, 0, (__global char *)(dst_addr + 0 * dst_stride_y));
- vstore16(c10_qs8, 0, (__global char *)(dst_addr + 1 * dst_stride_y));
- vstore16(c20_qs8, 0, (__global char *)(dst_addr + 2 * dst_stride_y));
- vstore16(c30_qs8, 0, (__global char *)(dst_addr + 3 * dst_stride_y));
-}
-
-/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in 16 bit fixed point precision
- * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_16bit and @ref gemm_transpose1x8 before running the matrix multiplication
- *
- * @note The number of columns of matrix B and the optional alpha's value need to be passed at compile time using -DCOLS_B and -DALPHA
- * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (i.e. -DMULT_TRANSPOSE1XW_WIDTH=2)
- * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at compile time using -DMULT_INTERLEAVE4X4_HEIGHT (i.e. -DMULT_INTERLEAVE4X4_HEIGHT=2)
- * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid out-of-bounds reads, the number of channels of matrix B must be passed at compile time using MATRIX_B_DEPTH (i.e. -DMATRIX_B_DEPTH=16)
- * This case can happen when GEMM is used to perform the element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have multiple inputs (i.e. a = [K, M, 16, Batches], b = [N, K, 16])
- * @note:ALPHA must be passed in 16 bit fixed point format
- *
- * @param[in] src0_ptr Pointer to the source matrix. Supported data types: QS16
- * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr
- * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- */
-__kernel void gemm_mm_interleaved_transposed_qs16(IMAGE_DECLARATION(src0),
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(dst),
- uint src0_stride_z,
- uint src1_stride_z,
- uint dst_stride_z)
-{
- int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
- int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
- int z = get_global_id(2);
-
- // Offset
- const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
- const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
-
- // src_addr_a = address of matrix A
- // src_addr_b = address of matrix B
- int src0_addr_in_bytes = z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
- int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
-
-#if defined(MATRIX_B_DEPTH)
- // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
- src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
-#else // defined(MATRIX_B_DEPTH)
- src1_addr_in_bytes += z * src1_stride_z;
-#endif // defined(MATRIX_B_DEPTH)
-
- __global short *src_addr_a = (__global short *)(src0_ptr + src0_addr_in_bytes);
- __global short *src_addr_b = (__global short *)(src1_ptr + src1_addr_in_bytes);
-
- // Compute end row address for matrix B
- __global short *src_end_addr_b = src_addr_b + COLS_B;
-
- src_addr_a += offset_row_a;
- src_addr_b += offset_row_b;
-
- // Reset accumulators
- int8 c00 = 0.0f;
- int8 c10 = 0.0f;
- int8 c20 = 0.0f;
- int8 c30 = 0.0f;
-
- // This for loop performs 1 accumulation for each iteration
- for(; src_addr_b < src_end_addr_b; src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
- {
- /* Load values from matrix A (interleaved) and matrix B (transposed) */
- short4 a0 = vload4(0, src_addr_a);
- short8 b0 = vload8(0, src_addr_b);
-
- c00 = mlal_sat_qs16x8(c00, (short8)a0.s0, b0, FIXED_POINT_POSITION);
- c10 = mlal_sat_qs16x8(c10, (short8)a0.s1, b0, FIXED_POINT_POSITION);
- c20 = mlal_sat_qs16x8(c20, (short8)a0.s2, b0, FIXED_POINT_POSITION);
- c30 = mlal_sat_qs16x8(c30, (short8)a0.s3, b0, FIXED_POINT_POSITION);
- }
-
- // Compute destination address
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Multiply by the weight of matrix product
- short8 c00_qs16 = convert_short8_sat(c00);
- short8 c10_qs16 = convert_short8_sat(c10);
- short8 c20_qs16 = convert_short8_sat(c20);
- short8 c30_qs16 = convert_short8_sat(c30);
-
-#if defined(ALPHA)
- c00_qs16 = mul_sat_qs16x8(c00_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
- c10_qs16 = mul_sat_qs16x8(c10_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
- c20_qs16 = mul_sat_qs16x8(c20_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
- c30_qs16 = mul_sat_qs16x8(c30_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
-
- // Compute dst address
- __global uchar *dst_addr = offset(&dst, 0, 0);
-
- // Add offset for batched GEMM
- dst_addr += z * dst_stride_z;
-
- // Store 8x4 block
- vstore8(c00_qs16, 0, (__global short *)(dst_addr + 0 * dst_stride_y));
- vstore8(c10_qs16, 0, (__global short *)(dst_addr + 1 * dst_stride_y));
- vstore8(c20_qs16, 0, (__global short *)(dst_addr + 2 * dst_stride_y));
- vstore8(c30_qs16, 0, (__global short *)(dst_addr + 3 * dst_stride_y));
-}
-#endif // defined(FIXED_POINT_POSITION)
#endif // defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
#if defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
@@ -2543,365 +2297,6 @@ __kernel void gemm_mm_floating_point_f16_bifrost(IMAGE_DECLARATION(src0),
}
#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
-#if defined(FIXED_POINT_POSITION)
-/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and matrix B (src1) in case both matrices have not beed reshaped
- *
- * @note This OpenCL kernel works with fixed point data types QS8
- * @note The number of elements processed along the x and y directions must be passed at compile time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y
- * @note The number matrix A columns, the number of elements processed per thread along the Y direction and the alpha's value need to be passed at compile time using -DCOLS_A, -DNUM_ELEMS_PROCESSED_PER_THREAD_Y and -DALPHA
- * @note The fixed point position need to be passed at compile time using -DFIXED_POINT_POSITION
- * @note The optional alpha value must be passed in 8 bit fixed point format using -DALPHA
- * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid out-of-bounds reads, the number of channels of matrix B must be passed at compile time using MATRIX_B_DEPTH (i.e. -DMATRIX_B_DEPTH=16)
- * This case can happen when GEMM is used to perform the element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have multiple inputs (i.e. a = [K, M, 16, Batches], b = [N, K, 16])
- *
- * @param[in] src0_ptr Pointer to the source matrix. Supported data types: QS8/QS16
- * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr
- * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- */
-__kernel void gemm_mm_qs8(IMAGE_DECLARATION(src0),
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(dst),
- uint src0_stride_z,
- uint src1_stride_z,
- uint dst_stride_z)
-{
- int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
-
- // Compute starting address for matrix A and Matrix B
- int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
-
- // Update address for the matrix A
- src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
-
- // Update address for the matrix B
- src_addr.s1 += idx * sizeof(char);
-
- // Add offset for batched GEMM
- src_addr.s0 += get_global_id(2) * src0_stride_z;
-
-#if defined(MATRIX_B_DEPTH)
- // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
- src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
-#else // defined(MATRIX_B_DEPTH)
- src_addr.s1 += get_global_id(2) * src1_stride_z;
-#endif // defined(MATRIX_B_DEPTH)
-
- int end_row_vec_a = src_addr.s0 + (COLS_A * sizeof(char));
-
- short8 acc00 = 0;
- short8 acc01 = 0;
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- short8 acc10 = 0;
- short8 acc11 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- short8 acc20 = 0;
- short8 acc21 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- short8 acc30 = 0;
- short8 acc31 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
-
- // This for loop performs 4 accumulations per iteration
- for(; src_addr.s0 <= (end_row_vec_a - 2); src_addr += (int2)(2, 2 * src1_stride_y))
- {
- char2 a0 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- char2 a1 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- char2 a2 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- char2 a3 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- char16 b0 = vload16(0, (__global char *)(src1_ptr + src_addr.s1 + 0 * src1_stride_y));
- char16 b1 = vload16(0, (__global char *)(src1_ptr + src_addr.s1 + 1 * src1_stride_y));
-
- acc00 = mlal_sat_qs8x8(acc00, (char8)a0.s0, b0.s01234567, FIXED_POINT_POSITION);
- acc00 = mlal_sat_qs8x8(acc00, (char8)a0.s1, b1.s01234567, FIXED_POINT_POSITION);
- acc01 = mlal_sat_qs8x8(acc01, (char8)a0.s0, b0.s89ABCDEF, FIXED_POINT_POSITION);
- acc01 = mlal_sat_qs8x8(acc01, (char8)a0.s1, b1.s89ABCDEF, FIXED_POINT_POSITION);
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc10 = mlal_sat_qs8x8(acc10, (char8)a1.s0, b0.s01234567, FIXED_POINT_POSITION);
- acc10 = mlal_sat_qs8x8(acc10, (char8)a1.s1, b1.s01234567, FIXED_POINT_POSITION);
- acc11 = mlal_sat_qs8x8(acc11, (char8)a1.s0, b0.s89ABCDEF, FIXED_POINT_POSITION);
- acc11 = mlal_sat_qs8x8(acc11, (char8)a1.s1, b1.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc20 = mlal_sat_qs8x8(acc20, (char8)a2.s0, b0.s01234567, FIXED_POINT_POSITION);
- acc20 = mlal_sat_qs8x8(acc20, (char8)a2.s1, b1.s01234567, FIXED_POINT_POSITION);
- acc21 = mlal_sat_qs8x8(acc21, (char8)a2.s0, b0.s89ABCDEF, FIXED_POINT_POSITION);
- acc21 = mlal_sat_qs8x8(acc21, (char8)a2.s1, b1.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc30 = mlal_sat_qs8x8(acc30, (char8)a3.s0, b0.s01234567, FIXED_POINT_POSITION);
- acc30 = mlal_sat_qs8x8(acc30, (char8)a3.s1, b1.s01234567, FIXED_POINT_POSITION);
- acc31 = mlal_sat_qs8x8(acc31, (char8)a3.s0, b0.s89ABCDEF, FIXED_POINT_POSITION);
- acc31 = mlal_sat_qs8x8(acc31, (char8)a3.s1, b1.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- }
-
- // Left-over accumulations
- for(; src_addr.s0 < end_row_vec_a; src_addr += (int2)(1, src1_stride_y))
- {
- char a0 = *((__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- char a1 = *((__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- char a2 = *((__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- char a3 = *((__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- char16 b0 = vload16(0, (__global char *)(src1_ptr + src_addr.s1));
-
- acc00 = mlal_sat_qs8x8(acc00, (char8)a0, b0.s01234567, FIXED_POINT_POSITION);
- acc01 = mlal_sat_qs8x8(acc01, (char8)a0, b0.s89ABCDEF, FIXED_POINT_POSITION);
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc10 = mlal_sat_qs8x8(acc10, (char8)a1, b0.s01234567, FIXED_POINT_POSITION);
- acc11 = mlal_sat_qs8x8(acc11, (char8)a1, b0.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc20 = mlal_sat_qs8x8(acc20, (char8)a2, b0.s01234567, FIXED_POINT_POSITION);
- acc21 = mlal_sat_qs8x8(acc21, (char8)a2, b0.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc30 = mlal_sat_qs8x8(acc30, (char8)a3, b0.s01234567, FIXED_POINT_POSITION);
- acc31 = mlal_sat_qs8x8(acc31, (char8)a3, b0.s89ABCDEF, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- }
-
- // Compute destination address
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Compute dst address
- __global uchar *dst_addr = offset(&dst, 0, 0);
-
- // Add offset for batched GEMM
- dst_addr += get_global_id(2) * dst_stride_z;
-
- // Multiply by the weight of matrix product and store the result
- char16 acc_qs8;
- acc_qs8 = convert_char16_sat((short16)(acc00, acc01));
-#if defined(ALPHA)
- acc_qs8 = mul_sat_qs8x16(acc_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore16(acc_qs8, 0, (__global char *)(dst_addr + 0 * dst_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc_qs8 = convert_char16_sat((short16)(acc10, acc11));
-#if defined(ALPHA)
- acc_qs8 = mul_sat_qs8x16(acc_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore16(acc_qs8, 0, (__global char *)(dst_addr + 1 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc_qs8 = convert_char16_sat((short16)(acc20, acc21));
-#if defined(ALPHA)
- acc_qs8 = mul_sat_qs8x16(acc_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore16(acc_qs8, 0, (__global char *)(dst_addr + 2 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc_qs8 = convert_char16_sat((short16)(acc30, acc31));
-#if defined(ALPHA)
- acc_qs8 = mul_sat_qs8x16(acc_qs8, (char16)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore16(acc_qs8, 0, (__global char *)(dst_addr + 3 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
-}
-
-/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and matrix B (src1) in case both matrices have not beed reshaped
- *
- * @note This OpenCL kernel works with fixed point data types QS16
- * @note The number of elements processed along the x and y directions must be passed at compile time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y
- * @note The number of matrix A columns, the number of elements processed per thread along the Y direction and the alpha's value need to be passed at compile time using -DCOLS_A, -DNUM_ELEMS_PROCESSED_PER_THREAD_Y and -DALPHA
- * @note The fixed point position need to be passed at compile time using -DFIXED_POINT_POSITION
- * @note The optional alpha value must be passed in 16 bit fixed point format using -DALPHA
- * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid out-of-bounds reads, the number of channels of matrix B must be passed at compile time using MATRIX_B_DEPTH (i.e. -DMATRIX_B_DEPTH=16)
- * This case can happen when GEMM is used to perform the element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have multiple inputs (i.e. a = [K, M, 16, Batches], b = [N, K, 16])
- *
- * @param[in] src0_ptr Pointer to the source matrix. Supported data types: QS8/QS16
- * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr
- * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- */
-__kernel void gemm_mm_qs16(IMAGE_DECLARATION(src0),
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(dst),
- uint src0_stride_z,
- uint src1_stride_z,
- uint dst_stride_z)
-{
- int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
-
- // Compute starting address for matrix A and Matrix B
- int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
-
- // Update address for the matrix A
- src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
-
- // Update address for the matrix B
- src_addr.s1 += idx * sizeof(short);
-
- // Add offset for batched GEMM
- src_addr.s0 += get_global_id(2) * src0_stride_z;
-
-#if defined(MATRIX_B_DEPTH)
- // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
- src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
-#else // defined(MATRIX_B_DEPTH)
- src_addr.s1 += get_global_id(2) * src1_stride_z;
-#endif // defined(MATRIX_B_DEPTH)
-
- int end_row_vec_a = src_addr.s0 + (COLS_A * sizeof(short));
-
- int8 acc0 = 0;
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- int8 acc1 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- int8 acc2 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- int8 acc3 = 0;
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
-
- // This for loop performs 4 accumulations per iteration
- for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(short)); src_addr += (int2)(2 * sizeof(short), 2 * src1_stride_y))
- {
- short2 a0 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- short2 a1 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- short2 a2 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- short2 a3 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- short8 b0 = vload8(0, (__global short *)(src1_ptr + src_addr.s1 + 0 * src1_stride_y));
- short8 b1 = vload8(0, (__global short *)(src1_ptr + src_addr.s1 + 1 * src1_stride_y));
-
- acc0 = mlal_sat_qs16x8(acc0, (short8)a0.s0, b0, FIXED_POINT_POSITION);
- acc0 = mlal_sat_qs16x8(acc0, (short8)a0.s1, b1, FIXED_POINT_POSITION);
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc1 = mlal_sat_qs16x8(acc1, (short8)a1.s0, b0, FIXED_POINT_POSITION);
- acc1 = mlal_sat_qs16x8(acc1, (short8)a1.s1, b1, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc2 = mlal_sat_qs16x8(acc2, (short8)a2.s0, b0, FIXED_POINT_POSITION);
- acc2 = mlal_sat_qs16x8(acc2, (short8)a2.s1, b1, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc3 = mlal_sat_qs16x8(acc3, (short8)a3.s0, b0, FIXED_POINT_POSITION);
- acc3 = mlal_sat_qs16x8(acc3, (short8)a3.s1, b1, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- }
-
- // Left-over accumulations
- for(; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(short), src1_stride_y))
- {
- short a0 = *((__global short *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- short a1 = *((__global short *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- short a2 = *((__global short *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- short a3 = *((__global short *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- short8 b0 = vload8(0, (__global short *)(src1_ptr + src_addr.s1));
-
- acc0 = mlal_sat_qs16x8(acc0, (short8)a0, b0, FIXED_POINT_POSITION);
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc1 = mlal_sat_qs16x8(acc1, (short8)a1, b0, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc2 = mlal_sat_qs16x8(acc2, (short8)a2, b0, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc3 = mlal_sat_qs16x8(acc3, (short8)a3, b0, FIXED_POINT_POSITION);
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- }
-
- // Compute destination address
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Compute dst address
- __global uchar *dst_addr = offset(&dst, 0, 0);
-
- // Add offset for batched GEMM
- dst_addr += get_global_id(2) * dst_stride_z;
-
- // Multiply by the weight of matrix product and store the result
- short8 acc_qs16;
- acc_qs16 = convert_short8_sat(acc0);
-#if defined(ALPHA)
- acc_qs16 = mul_sat_qs16x8(acc_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore8(acc_qs16, 0, (__global short *)(dst_addr + 0 * dst_stride_y));
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
- acc_qs16 = convert_short8_sat(acc1);
-#if defined(ALPHA)
- acc_qs16 = mul_sat_qs16x8(acc_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore8(acc_qs16, 0, (__global short *)(dst_addr + 1 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- acc_qs16 = convert_short8_sat(acc2);
-#if defined(ALPHA)
- acc_qs16 = mul_sat_qs16x8(acc_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore8(acc_qs16, 0, (__global short *)(dst_addr + 2 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
-#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- acc_qs16 = convert_short8_sat(acc3);
-#if defined(ALPHA)
- acc_qs16 = mul_sat_qs16x8(acc_qs16, (short8)ALPHA, FIXED_POINT_POSITION);
-#endif // defined(ALPHA)
- vstore8(acc_qs16, 0, (__global short *)(dst_addr + 3 * dst_stride_y));
-#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
-}
-#endif // defined(FIXED_POINT_POSITION)
#endif // defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
#if defined(BETA)
@@ -2988,94 +2383,6 @@ __kernel void gemm_ma_f16(TENSOR3D_DECLARATION(src),
vstore8(out, 0, (__global half *)dst.ptr);
}
#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
-
-#if defined(FIXED_POINT_POSITION)
-/** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 8 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta:
- *
- * @note The beta's value and the fixed point position need to be passed at compile time using -DBETA and -DFIXED_POINT_POSITION
- *
- * @note: BETA must be passed in 8 bit fixed point format
- *
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: QS8
- * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] src_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- */
-__kernel void gemm_ma_qs8(TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst))
-{
- // Compute source and destination addresses
- Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
-
- // Load values from A x B
- char16 alpha_ab = vload16(0, (__global char *)dst.ptr);
-
- // Load values from Matrix C
- char16 c = vload16(0, (__global char *)src.ptr);
-
- // Computes alpha * axb + beta * c
- char16 out = mla_sat_qs8x16(alpha_ab, (char16)BETA, c, FIXED_POINT_POSITION);
-
- // Store final result in axb matrix
- vstore16(out, 0, (__global char *)dst.ptr);
-}
-
-/** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 16 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta:
- *
- * @note The beta's value and the fixed point position need to be passed at compile time using -DBETA and -DFIXED_POINT_POSITION
- *
- * @note: BETA must be passed in 16 bit fixed point format
- *
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: QS16
- * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] src_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix
- * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes)
- * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes)
- * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
- */
-__kernel void gemm_ma_qs16(TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst))
-{
- // Compute source and destination addresses
- Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
-
- // Load values from A x B
- short8 alpha_ab = vload8(0, (__global short *)dst.ptr);
-
- // Load values from Matrix C
- short8 c = vload8(0, (__global short *)src.ptr);
-
- // Computes alpha * axb + beta * c
- short8 out = mla_sat_qs16x8(alpha_ab, (short8)BETA, c, FIXED_POINT_POSITION);
-
- // Store final result in axb matrix
- vstore8(out, 0, (__global short *)dst.ptr);
-}
-#endif // defined(FIXED_POINT_POSITION)
#endif // defined(BETA)
#if defined(WIDTH_VECTOR_A)
@@ -3151,7 +2458,7 @@ __kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0),
* @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=short.
* @note The vector size must be passed at compile time using -DVECTOR_SIZE e.g. -DVECTOR_SIZE=16.
*
- * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+ * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: U8/S8/U16/S16/F16/U32/S32/F32
* @param[in] accum_stride_x Stride of the accmulate tensor in X dimension (in bytes)
* @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] accum_stride_y Stride of the accumlulate tensor in Y dimension (in bytes)
@@ -3175,11 +2482,7 @@ __kernel void gemm_accumulate_biases(
accum_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)accum.ptr);
VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
biases_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)biases.ptr);
-#ifdef FIXED_POINT_POSITION
- accum_value = ADD_SAT_OP_EXPAND(biases_value, accum_value, DATA_TYPE, VECTOR_SIZE);
-#else // FIXED_POINT_POSITION
- accum_value = biases_value + accum_value;
-#endif // FIXED_POINT_POSITION
+ accum_value = biases_value + accum_value;
// Store result in the accumulate buffer
VSTORE(VECTOR_SIZE)
(accum_value, 0, (__global DATA_TYPE *)accum.ptr);
diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl
index 6f25ad4b7a..d034b30b68 100644
--- a/src/core/CL/cl_kernels/im2col.cl
+++ b/src/core/CL/cl_kernels/im2col.cl
@@ -23,12 +23,7 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-#include "fixed_point.h"
-#endif // FIXED_POINT_POSITION
-
#if defined(DATA_TYPE) && defined(ELEMENT_SIZE)
-#if !defined(FIXED_POINT_POSITION)
#if ELEMENT_SIZE == 1
#define COND_DATA_TYPE char
@@ -50,7 +45,7 @@
* @note The stride along the Y direction must be passed at compile time using -DSTRIDE_Y: e.g. -DSTRIDE_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -139,7 +134,7 @@ __kernel void im2col1x1_stridex1_dchw(
* @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -232,7 +227,7 @@ __kernel void im2col_generic_nhwc(
* @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -338,7 +333,7 @@ __kernel void im2col3x3_nhwc(
* @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -425,7 +420,7 @@ __kernel void im2col3x3_dchw(
* @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -598,7 +593,7 @@ __kernel void im2col5x5_dchw(
* @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -788,7 +783,6 @@ __kernel void im2col11x11_padx0_pady0_dchw(
#endif // HAS_BIAS
}
#endif // defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(KERNEL_DEPTH)
-#endif // !defined(FIXED_POINT_POSITION)
#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(VECTOR_SIZE) && defined(WIDTH_MOD_VECTOR_SIZE)
/** This kernel reshapes the input tensor to a tensor used to perform convolution using GEMM when
@@ -799,7 +793,7 @@ __kernel void im2col11x11_padx0_pady0_dchw(
* @note The width modulo vector size must be passed at compile time using -DWIDTH_MOD_VECTOR_SIZE e.g. -DWIDTH_MOD_VECTOR_SIZE=3.
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -863,11 +857,7 @@ __kernel void im2col_generic_padx0_pady0_dchw(
#ifdef HAS_BIAS
if(ch == (KERNEL_DEPTH - 1))
{
-#ifdef FIXED_POINT_POSITION
- *output_ptr = (DATA_TYPE)(1 << FIXED_POINT_POSITION);
-#else // FIXED_POINT_POSITION
*output_ptr = 1.0f;
-#endif // FIXED_POINT_POSITION
}
#endif // HAS_BIAS
}
@@ -886,7 +876,7 @@ __kernel void im2col_generic_padx0_pady0_dchw(
* @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1
* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -950,11 +940,7 @@ __kernel void im2col_generic_dchw(
#ifdef HAS_BIAS
if(ch == (KERNEL_DEPTH - 1))
{
-#ifdef FIXED_POINT_POSITION
- *output_ptr = (DATA_TYPE)(1 << FIXED_POINT_POSITION);
-#else // FIXED_POINT_POSITION
*output_ptr = 1.0f;
-#endif // FIXED_POINT_POSITION
}
#endif // HAS_BIAS
}
@@ -966,7 +952,7 @@ __kernel void im2col_generic_dchw(
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
* @note In case biases will be added in late stage, -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -999,11 +985,7 @@ __kernel void im2col_reduced_dchw(
if(get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1))
{
tmp_out_ptr += dst_stride_x;
-#ifdef FIXED_POINT_POSITION
- *((__global DATA_TYPE *)tmp_out_ptr) = (DATA_TYPE)(1 << FIXED_POINT_POSITION);
-#else // FIXED_POINT_POSITION
*((__global DATA_TYPE *)tmp_out_ptr) = (DATA_TYPE)1.0f;
-#endif // FIXED_POINT_POSITION
}
#endif // HAS_BIAS
}
diff --git a/src/core/CL/cl_kernels/l2_normalize.cl b/src/core/CL/cl_kernels/l2_normalize.cl
index 8d47631019..f58e98bace 100644
--- a/src/core/CL/cl_kernels/l2_normalize.cl
+++ b/src/core/CL/cl_kernels/l2_normalize.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,11 +28,11 @@
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] sum_ptr Pointer to the source tensor. Supported data types: QS8/F16/F32
+ * @param[in] sum_ptr Pointer to the source tensor. Supported data types: F16/F32
* @param[in] sum_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] sum_offset_first_element_in_bytes The offset of the first element in the source tensor
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index bc00252fbd..dbdad27865 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,22 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-
-#include "fixed_point.h"
-#define MUL_OP(x, y) MUL_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define ADD_OP(x, y) ADD_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE)
-#define DIV_OP(x, y) DIV_SAT_OP_VEC_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define EXP_OP(x) EXP_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define LOG_OP(x) LOG_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define POW_OP(x, y) EXP_OP(MUL_OP(LOG_OP((x)), (y)))
-#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION)
-
-#define LOAD_OP(offset, ptr) vload16(offset, ptr)
-#define STORE_OP(data, offset, ptr) vstore16(data, offset, ptr)
-
-#else // FIXED_POINT_POSITION
-
#define MUL_OP(x, y) ((x) * (y))
#define ADD_OP(x, y) ((x) + (y))
#define DIV_OP(x, y) ((x) / (y))
@@ -48,18 +32,15 @@
#define LOAD_OP(offset, ptr) vload4(offset, ptr)
#define STORE_OP(data, offset, ptr) vstore4(data, offset, ptr)
-#endif // FIXED_POINT_POSITION
-
/** Apply cross-map normalization.
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16
* @note The radius should be given as a preprocessor argument using -DRADIUS=size. e.g. -DRADIUS=5
* @note The number of slices should be given as a preprocessor argument using -DNUM_SLICES=size. e.g. -DNUM_SLICES=192
- * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
* @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -116,10 +97,9 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input),
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16
* @note The radius should be given as a preprocessor argument using -DRADIUS=size. e.g. -DRADIUS=5
- * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
* @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/F16/F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/permute.cl b/src/core/CL/cl_kernels/permute.cl
index 6f978c9b70..03fc15e4e8 100644
--- a/src/core/CL/cl_kernels/permute.cl
+++ b/src/core/CL/cl_kernels/permute.cl
@@ -29,7 +29,7 @@
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -63,7 +63,7 @@ __kernel void permute_201(
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -97,7 +97,7 @@ __kernel void permute_120(
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_int.cl b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
index b5734a39ed..c99a08a583 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_int.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,18 +23,6 @@
*/
#include "helpers.h"
-#if defined(FIXED_POINT_POSITION)
-
-#include "fixed_point.h"
-
-#if defined(SATURATE)
-#define MUL_OP(x, y, scale, type, size) MUL_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
-#else // SATURATE
-#define MUL_OP(x, y, scale, type, size) MUL_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
-#endif // SATURATE
-
-#else // FIXED_POINT_POSITION
-
#if defined(SATURATE)
#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size##_sat(x))
#else // SATURATE
@@ -44,17 +32,14 @@
#define MUL_OP(x, y, scale, type, size) CONVERT_OP_INT((x) * (y) >> scale, type, size)
-#endif // FIXED_POINT_POSITION
-
/** Performs a pixelwise multiplication with integer scale of integer inputs.
*
* @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
* e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
* @attention The data_type of the intermediate result of the multiplication should passed as well using -DDATA_TYPE_RES.
* e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
- * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
*
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/QS8/QS16/S16
+ * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/S16
* @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
@@ -78,7 +63,7 @@
* @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
* @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Integer scaling factor. Supported data types: S32 (ignored for QS8 and QS16 as the assumption is scale = 1).
+ * @param[in] scale Integer scaling factor. Supported data types: S32.
*/
__kernel void pixelwise_mul_int(
TENSOR3D_DECLARATION(in1),
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 2c7ddfdf23..c38a78ce3e 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -23,28 +23,6 @@
*/
#include "helpers.h"
-#ifdef FIXED_POINT_POSITION
-
-#include "fixed_point.h"
-
-#if defined(POOL_AVG)
-#define POOL_OP(x, y) add_sat(x, y)
-#else /* POOL_AVG */
-#define POOL_OP(x, y) (max((x), (y)))
-#endif /* POOL_AVG */
-
-#define DIV_OP1(x, y) DIV_SAT_OP_EXPAND((x), (y), DATA_TYPE, FIXED_POINT_POSITION)
-#define DIV_OP(x, y) DIV_OP1(x, y << FIXED_POINT_POSITION)
-#define SQRT_OP(x) DIV_OP1((1 << FIXED_POINT_POSITION), (INVSQRT_OP_EXPAND((x), DATA_TYPE, 1, FIXED_POINT_POSITION)))
-
-#if defined(POOL_L2)
-#define POW2_OP(x, vec_size) MUL_SAT_OP_EXPAND((x), (x), DATA_TYPE, vec_size, FIXED_POINT_POSITION)
-#else /* defined(POOL_L2) */
-#define POW2_OP(x, vec_size) (x)
-#endif /* defined(POOL_L2) */
-
-#else /* FIXED_POINT_POSITION */
-
#if defined(POOL_AVG) || defined(POOL_L2)
#define POOL_OP(x, y) ((x) + (y))
#else /* defined(POOL_AVG) || defined(POOL_L2) */
@@ -60,8 +38,6 @@
#define DIV_OP(x, y) (x * (1.f / y))
#define SQRT_OP(x) sqrt((x))
-#endif /* FIXED_POINT_POSITION */
-
#define DIV_OP_NHWC(x, y) (x * (VEC_DATA_TYPE(DATA_TYPE, 8))(1.f / y))
#if STRIDE_X == 1
@@ -201,14 +177,14 @@ DATA_TYPE calculate_avg_scale(const int pool_size_x, const int pool_size_y, cons
/** Performs a pooling function of pool size equal to 2.
*
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -265,14 +241,14 @@ __kernel void pooling_layer_2(
/** Performs a pooling function of pool size equal to 3
*
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -331,7 +307,7 @@ __kernel void pooling_layer_3(
*(__global DATA_TYPE *)output.ptr = res;
}
-#if defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
+#if defined(POOLING3x3)
#define CONVERT_OP(data_type) convert_##data_type##4
#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)
@@ -353,7 +329,7 @@ calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upp
/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3
*
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
@@ -403,7 +379,7 @@ __kernel void pooling_layer_optimized_3(
vstore4(res, 0, (__global DATA_TYPE *)output.ptr);
}
-#endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
+#endif // defined(POOLING3x3)
#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)
@@ -411,23 +387,17 @@ __kernel void pooling_layer_optimized_3(
#if defined(POOL_AVG) || defined(POOL_L2)
#define INITIAL_VALUE 0
#else /* defined(POOL_AVG) || defined(POOL_L2) */
-#ifdef FIXED_POINT_POSITION
-#define MIN_VAL_EXPAND(type) type##_MIN
-#define MIN_VAL(type) MIN_VAL_EXPAND(type)
-#define INITIAL_VALUE MIN_VAL(DATA_TYPE)
-#else // FIXED_POINT_POSITION
#if FP16
#define INITIAL_VALUE -HALF_MAX
#else // FP16
#define INITIAL_VALUE -FLT_MAX
#endif // FP16
-#endif // FIXED_POINT_POSITION
#endif // POOL_AVG
/** Performs a pooling function of pool size equal to N (NCHW)
*
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
* @note -DFP16 must be passed at compile time if half float data type is used
* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
* @note In case of average pooling the following information must be passed at compile time:
@@ -436,7 +406,7 @@ __kernel void pooling_layer_optimized_3(
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/reshape_layer.cl b/src/core/CL/cl_kernels/reshape_layer.cl
index 23eccbf817..11393d246d 100644
--- a/src/core/CL/cl_kernels/reshape_layer.cl
+++ b/src/core/CL/cl_kernels/reshape_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,7 @@
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: U8/S8/QS8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: U8/S8/U16/S16/U32/S32/F16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index aa1fa01c53..e549b44245 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -23,23 +23,6 @@
*/
#include "helpers.h"
-#ifdef FIXED_POINT_POSITION
-
-#include "fixed_point.h"
-#define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size)
-#define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size)
-#define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size)
-#define MUL_OP(x, y, type, size) MUL_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
-#define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
-#define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION)
-
-#define MIN_VAL_EXPAND(type) type##_MIN
-#define MIN_VAL(type) MIN_VAL_EXPAND(type)
-#define MINVAL MIN_VAL(DATA_TYPE)
-#define SELECT_DATA_TYPE EXPAND(DATA_TYPE)
-
-#else /* FIXED_POINT_POSITION */
-
#define MAX_OP(x, y, type, size) max((x), (y))
#define ADD_OP(x, y, type, size) ((x) + (y))
#define SUB_OP(x, y, type, size) ((x) - (y))
@@ -55,8 +38,6 @@
#define SELECT_DATA_TYPE int
#endif /* USE_F16 */
-#endif /* FIXED_POINT_POSITION */
-
/* Number of workitems in dimension 0. */
#if !defined(GRID_SIZE)
#define GRID_SIZE 1
@@ -91,9 +72,8 @@ __constant uint4 idx4 = (uint4)(0, 1, 2, 3);
/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel.
*
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
*
- * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -138,11 +118,10 @@ __kernel void softmax_layer_norm(
* then gets the exponent of each element as sums all elements across each row.
*
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not a multiple of VECTOR_SIZE (2,4,8,16) -DNON_MULTIPLE_OF_VECTOR_SIZE must be passed.
* @note Beta can be optionally passed at compile time using -DBETA (by default, it is 1.0).
*
- * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -288,11 +267,10 @@ __kernel void softmax_layer_max_shift_exp_sum_serial(
* then gets the exponent of each element as sums all elements across each row.
*
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not a multiple of VECTOR_SIZE (2,4,8,16) -DNON_MULTIPLE_OF_VECTOR_SIZE must be passed.
* @note Beta can be optionally passed at compile time using -DBETA (by default, it is 1.0).
*
- * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index c055381fc5..95d6d4bcc5 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -230,10 +230,9 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial(
* then gets the exponent of each element as sums all elements across each row.
*
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not a multiple of VECTOR_SIZE (2,4,8,16) -DNON_MULTIPLE_OF_VECTOR_SIZE must be passed.
*
- * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -519,7 +518,6 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel.
*
- * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note Quantized beta can be optionally passed at compile time using -DINPUT_BETA_MULTIPLIER and -DINPUT_BETA_LEFT_SHIFT (if undefined, assume beta equals 1.0)
* @note -DDIFF_MIN must be passed at compile time. It is threshold difference between maximum value of input data and current processed value, it defines whether the value will be taken into account or not.
*
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 3d8824aa2a..1ae1032cba 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
@@ -47,7 +46,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
&& (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
&& (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU),
@@ -58,7 +57,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -118,7 +116,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
const DataType dt = input->info()->data_type();
- const int fixed_point_position = input->info()->fixed_point_position();
float a_const = act_info.a();
float b_const = act_info.b();
int a_const_int = 0;
@@ -127,16 +124,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
// Create quantized version of constants a, b if needed
if(is_data_type_quantized(dt))
{
- if(is_data_type_fixed_point(dt))
- {
- a_const_int = static_cast<int>(lround(a_const * (1 << fixed_point_position)));
- b_const_int = static_cast<int>(lround(b_const * (1 << fixed_point_position)));
- }
- else
- {
- a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP);
- b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP);
- }
+ a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP);
+ b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP);
}
// Set build options
@@ -177,10 +166,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : "");
- if(is_data_type_fixed_point(dt))
- {
- build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fixed_point_position)));
- }
// Create kernel
std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer");
diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
index 011807ad88..78651f8679 100644
--- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
@@ -37,9 +37,9 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
{
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
const bool is_qasymm = is_data_type_quantized_asymmetric(input1.data_type()) || is_data_type_quantized_asymmetric(input2.data_type());
if(is_qasymm)
@@ -50,18 +50,16 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(&input1, &input2);
// Validate in case of configured output
if(output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((output.data_type() == DataType::U8) && ((input1.data_type() != DataType::U8) || (input2.data_type() != DataType::U8)),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
"Wrong shape for output");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(&input1, &output);
if(is_qasymm)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output);
@@ -142,11 +140,7 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen
build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- if(is_data_type_fixed_point(input1->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position()));
- }
- else if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
+ if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
{
build_opts.emplace("-DOFFSET=" + support::cpp11::to_string(input1->info()->quantization_info().offset));
kernel_name += "_quantized";
diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
index db91bc0084..aeee6022a7 100644
--- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,21 +44,19 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
{
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2);
// Validate in case of configured output
if((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, output);
}
return Status{};
@@ -122,10 +120,6 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL
build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- if(is_data_type_fixed_point(input1->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position()));
- }
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arithmetic_sub", build_opts));
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 391baef96a..5999c66056 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -46,22 +45,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
{
ARM_COMPUTE_UNUSED(epsilon);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, var);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, mean, var);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0));
if(beta != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, beta);
}
if(gamma != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, gamma);
}
if(act_info.enabled())
@@ -78,7 +74,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -168,7 +163,6 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
- build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
build_opts.add_option_if(beta == nullptr, "-DUSE_DEFAULT_BETA");
build_opts.add_option_if(gamma == nullptr, "-DUSE_DEFAULT_GAMMA");
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index 1de987264c..5f0f0aebf8 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -39,8 +39,8 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups < 2, "Channel shuffling with less than 2 groups would be inefficient");
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 64e6a0b7d8..6274c9082a 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -44,14 +44,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, s
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
// Checks performed when output is configured
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
}
@@ -64,7 +63,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims)));
- const unsigned int num_elems_read_per_iteration = is_data_type_fixed_point(input->data_type()) ? 1 : 8;
+ const unsigned int num_elems_read_per_iteration = 8;
// Configure window
Window win = calculate_max_window(*input, Steps(num_elems_read_per_iteration));
@@ -106,7 +105,6 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p
build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size()));
build_opts.add_option("-DWIDTH_INPUT=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.first));
- build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("col2im", build_opts.options()));
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index c3cd494662..a39d1f4a0b 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -75,7 +75,7 @@ Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input,
DataLayout data_layout)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::QS32, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 204f9aed6f..72dc21197d 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,9 +62,8 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
};
ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index 83908a1469..2f5b2466b1 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,21 +40,15 @@ using namespace arm_compute;
void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
- DataType::U16, DataType::U32, DataType::S32, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16,
+ DataType::U16, DataType::U32, DataType::S32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
DataType::U16, DataType::U32, DataType::S32, DataType::F32);
ARM_COMPUTE_ERROR_ON(input == output);
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different");
ARM_COMPUTE_ERROR_ON(shift >= 8);
// Check if convertion is supported
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32,
- "Only data types supported [in] QS8 -> [out] F32");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32),
- "Only data types supported [in] QS16 -> [out] F32");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16),
- "Only data types supported [in] F32 -> [out] QS8, QS16");
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16
&& output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32),
"Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
@@ -99,10 +93,6 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
}
build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
- }
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 1de08aa1a2..9d9c280182 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -146,7 +146,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
output_shape,
1,
input->info()->data_type(),
- input->info()->fixed_point_position(),
input->info()->quantization_info());
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info));
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index bef13f9b1c..cab943629a 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -53,7 +53,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(idx_c) * depth_multiplier) != output->dimension(2));
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0)));
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index c97ecaf8e0..e124ee42f3 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -61,7 +61,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, s
TensorShape output_shape = compute_output_shape(input->tensor_shape(), conv_w, conv_h, output->data_layout());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
index fd3b75484a..c28be3fccf 100644
--- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
@@ -46,7 +46,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && (biases != nullptr));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(1));
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(idx_w) * input->dimension(idx_h) + ((biases != nullptr) ? 1 : 0)));
@@ -54,7 +53,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
if(biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != input->dimension(idx_c));
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index fa982d6cf2..fba721f50b 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *min_max)
{
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32, 0);
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
constexpr unsigned int num_elems_processed_per_iteration = 4;
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index d2794d7abd..dcb4ac1c5d 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -45,7 +45,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) != weights->dimension(1),
"Weights should have same width as length");
@@ -84,7 +84,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
misc::shape_calculator::compute_deep_convolution_shape(*input, *weights, conv_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -103,7 +102,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
auto_init_if_empty(*output, output_shape,
1,
input->data_type(),
- input->fixed_point_position(),
input->quantization_info());
unsigned int conv_stride_x = std::get<0>(conv_info.stride());
@@ -265,7 +263,6 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
output_shape,
1,
input->info()->data_type(),
- input->info()->fixed_point_position(),
input->info()->quantization_info());
// Perform validation step
@@ -302,18 +299,14 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
}
else
{
- bool is_quantized_fixed_point = is_data_type_fixed_point(data_type);
- bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type);
- DataType promoted_type = (is_quantized_fixed_point) ? get_promoted_data_type(data_type) : data_type;
+ bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type);
build_options.add_option_if(is_quantized_asymm, std::string("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size)));
build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type)));
build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(2))));
build_options.add_option(std::string("-DSTRIDE_X=" + support::cpp11::to_string(_conv_stride_x)));
- build_options.add_option_if(is_quantized_fixed_point,
- std::string("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
- build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(promoted_type)));
+ build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(data_type)));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(is_quantized_asymm ? "direct_convolution_1x1_3x3_5x5_quantized" : kernel_name.str(),
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 66504e67b5..3b1edaf46c 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -91,10 +91,6 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
build_opts.emplace(("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom)));
build_opts.emplace(("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left)));
build_opts.emplace(("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right)));
- if(is_data_type_fixed_point(tensor->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION");
- }
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
@@ -125,14 +121,12 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
case DataType::QASYMM8:
set_constant_border<uint8_t>(idx, constant_border_value);
break;
- case DataType::QS8:
case DataType::S8:
set_constant_border<int8_t>(idx, constant_border_value);
break;
case DataType::U16:
set_constant_border<uint16_t>(idx, constant_border_value);
break;
- case DataType::QS16:
case DataType::S16:
set_constant_border<int16_t>(idx, constant_border_value);
break;
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index 11f8e33319..f6b0e829a0 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,7 @@ void CLFloorKernel::configure(const ICLTensor *input, ICLTensor *output)
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Auto initialize output
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
index ba475f5819..12a40cd7dc 100644
--- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
+++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
@@ -44,15 +44,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
{
ARM_COMPUTE_RETURN_ERROR_ON(mult_interleave4x4_height < 1);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::U8, DataType::S8,
- DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8,
+ DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_interleaved_shape(*input, mult_interleave4x4_height));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index 3f705ac0a7..e040122663 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -172,7 +172,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
tensor_shape.set(0, is_interleaved_transposed ? reshape_info.n() : input1->info()->dimension(0));
tensor_shape.set(1, is_interleaved_transposed ? reshape_info.m() : input0->info()->dimension(1));
- auto_init_if_empty(*output->info(), tensor_shape, 1, DataType::S32, 1, QuantizationInfo());
+ auto_init_if_empty(*output->info(), tensor_shape, 1, DataType::S32, QuantizationInfo());
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info));
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index 81e455fce8..04cf627818 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -41,9 +41,8 @@ namespace
Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(biases, accum);
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
return Status{};
@@ -95,8 +94,6 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
- build_opts.add_option_if(is_data_type_fixed_point(accum->info()->data_type()),
- "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(accum->info()->fixed_point_position()));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
index c50ee24a70..bcc3a01296 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
@@ -64,7 +63,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f
ARM_COMPUTE_UNUSED(input, output, beta);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -88,19 +87,7 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou
_output = output;
std::ostringstream ma_arguments;
- if(is_data_type_fixed_point(input->info()->data_type()))
- {
- ma_arguments << "-DBETA=" << (input->info()->data_type() == DataType::QS8 ?
- sqcvt_qs8_f32(beta, input->info()->fixed_point_position()) :
- sqcvt_qs16_f32(beta, input->info()->fixed_point_position()))
- << " ";
- ma_arguments << "-DFIXED_POINT_POSITION=" << input->info()->fixed_point_position();
- }
- else
- {
- ma_arguments << "-DBETA=" << beta;
- }
-
+ ma_arguments << "-DBETA=" << beta;
std::set<std::string> build_opts;
build_opts.emplace(ma_arguments.str());
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index 2c2a92d070..814cbb631f 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -31,7 +31,6 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
@@ -53,10 +52,8 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_fixed_point(input0->data_type()) && (reshape_info.depth_output_gemm3d() != 1), "GEMM3D only supports floating point data types");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3");
@@ -95,7 +92,6 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
const TensorInfo tensor_info_output = output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, is_interleaved_transposed, reshape_info));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
}
return Status{};
@@ -219,7 +215,6 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
_slide_matrix_b = _input1->info()->num_dimensions() >= _input0->info()->num_dimensions();
const DataType data_type = input0->info()->data_type();
- const int fp_pos = input0->info()->fixed_point_position();
// Get target architecture
GPUTarget gpu_target = get_target();
@@ -236,14 +231,11 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
// Create build options
CLBuildOptions build_opts;
- build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(fp_pos));
// Only define ALPHA when alpha is not 1.0f. This avoids performing unnecessary multiplications.
if(std::abs(1.0f - alpha) > 0.00001f)
{
- build_opts.add_option_if_else(is_data_type_fixed_point(data_type),
- "-DALPHA=" + support::cpp11::to_string((data_type == DataType::QS8 ? sqcvt_qs8_f32(alpha, fp_pos) : sqcvt_qs16_f32(alpha, fp_pos))),
- "-DALPHA=" + float_to_string_with_full_precision(alpha));
+ build_opts.add_option("-DALPHA=" + float_to_string_with_full_precision(alpha));
}
build_opts.add_option_if(_is_gemm3d, "-DREINTERPRET_OUTPUT_AS_3D");
build_opts.add_option_if(_is_gemm3d, "-DHEIGHT_GEMM3D=" + support::cpp11::to_string(output->info()->dimension(1)));
@@ -299,10 +291,6 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
// via exhaustive autotuning over a range of representative layer configurations.
_lws_hint = cl::NDRange(4);
}
- else if(is_data_type_fixed_point(data_type))
- {
- kernel_name = "gemm_mm_" + lower_string(string_from_data_type(data_type));
- }
else // (MIDGARD and F32) or (F16)
{
kernel_name = "gemm_mm_floating_point";
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index d8ecd501b0..43a6cf25db 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -42,7 +42,6 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input0->data_type()) && (output->data_type() != DataType::S32));
ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(2) != input1->dimension(1));
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
index 7a8a1e529d..7e44fa7118 100644
--- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
@@ -47,8 +47,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
{
ARM_COMPUTE_RETURN_ERROR_ON(mult_transpose1xW_width < 1);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::U8, DataType::S8,
- DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8,
+ DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
if(output->total_size() != 0)
@@ -56,7 +56,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
compute_transpose1xW_with_element_size_shape(*input, mult_transpose1xW_width));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 5d4e039e94..b54575ae30 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -48,7 +48,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
@@ -58,7 +58,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, b
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -136,7 +135,7 @@ CLIm2ColKernel::configure_window(const ICLTensor *input, ICLTensor *output, cons
if(dilation == Size2D(1U, 1U))
{
- if(squared_im2col && !is_data_type_fixed_point(data_type))
+ if(squared_im2col)
{
// Check if we can run an optimized im2col
switch(kernel_dims.width)
@@ -304,7 +303,6 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size()));
build_opts.add_option_if(has_bias, "-DHAS_BIAS");
- build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
_num_elems_processed_per_iteration = 1;
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 3d30350c59..39d9f958d3 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -26,7 +26,6 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -78,7 +77,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index 60dd5e7de3..9493ddc878 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -62,7 +62,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
TensorShape output_shape = compute_min_max_shape(input);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
const unsigned int num_elems_processed_per_iteration = 1;
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index 5456876ee8..df01eab240 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,6 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -40,24 +39,16 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
- if(is_data_type_fixed_point(input->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input);
- }
-
// Checks performed when output is configured
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -74,7 +65,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
const BorderSize border_size = BorderSize(0, border_width);
- const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->data_type())) ? 16 : 4;
+ const unsigned int num_elems_processed_per_iteration = 4;
const unsigned int num_elems_read_per_iteration = is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) : num_elems_processed_per_iteration;
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
@@ -119,14 +110,12 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
_border_size = BorderSize(0, border_width);
- const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4;
+ const unsigned int num_elems_processed_per_iteration = 4;
const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
// Set build options
CLBuildOptions build_opts;
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()),
- "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
build_opts.add_option(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index 168ab81088..7c0c95be1c 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -52,8 +52,8 @@ TensorShape get_output_shape(const ITensorInfo *input, const PermutationVector &
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((perm != PermutationVector{ 2U, 0U, 1U })
@@ -68,7 +68,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
}
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index a9df36dfcc..4ea093fe04 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -51,36 +51,23 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_UNUSED(rounding_policy);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2);
-
- if(is_data_type_fixed_point(input1->data_type()))
- {
- // All data types must be all QS8 or all QS16
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale != 1, "Unsupported scaling factor for QS8/QS16. Scale must be 1.");
- }
// Validate in case of configured output
if(output->total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, output);
- if(is_data_type_fixed_point(input1->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
- }
}
return Status{};
@@ -174,14 +161,6 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I
{
compute_type = "int";
}
- else if(input1->info()->data_type() == DataType::QS8)
- {
- compute_type = "qs8";
- }
- else if(input1->info()->data_type() == DataType::QS16)
- {
- compute_type = "qs16";
- }
else
{
compute_type = "ushort";
@@ -197,10 +176,6 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I
std::set<std::string> build_opts;
build_opts.emplace((overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
build_opts.emplace((rounding_policy == RoundingPolicy::TO_ZERO) ? "-DROUND=_rtz" : "-DROUND=_rte");
- if(is_data_type_fixed_point(input1->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position()));
- }
build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 81c52ed53b..246ab68130 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -62,7 +62,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
switch(data_layout)
{
case DataLayout::NCHW:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
break;
case DataLayout::NHWC:
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
@@ -78,8 +78,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
- TensorInfo out_info(TensorInfo(compute_pool_shape(*input, pool_info), 1, output->data_type(), output->fixed_point_position()));
+ TensorInfo out_info(TensorInfo(compute_pool_shape(*input, pool_info), 1, output->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
}
@@ -214,8 +213,6 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type));
- build_opts.add_option_if(is_data_type_fixed_point(data_type),
- "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x));
build_opts.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y));
build_opts.add_option("-DPAD_X=" + support::cpp11::to_string(pool_pad_left));
@@ -240,7 +237,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
{
// Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where
// each thread computes 4 output elements
- const bool is_pool3x3_stride_le3 = (pool_size_x == 3) && (pool_size_y == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type);
+ const bool is_pool3x3_stride_le3 = (pool_size_x == 3) && (pool_size_y == 3) && (pool_stride_x <= 3);
std::string kernel_name = ((is_pool3x3_stride_le3) ? "pooling_layer_optimized_" : "pooling_layer_")
+ support::cpp11::to_string(pool_size_x);
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 028e50821f..af751f4832 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *min_max)
{
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::U8, 0);
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::U8);
constexpr unsigned int num_elems_processed_per_iteration = 4;
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index 51873ff66a..4048e927f5 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -56,7 +56,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLROIArra
// Output auto inizialitation if not yet initialized
TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2), rois->num_values());
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pool_info.pooled_width()) || (output->info()->dimension(1) != pool_info.pooled_height()));
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index c44fced3e3..d64f0d89c5 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -65,7 +64,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
// Output tensor auto initialization if not yet initialized
TensorShape output_shape{ input->tensor_shape() };
output_shape.set(axis, 1);
- auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
const unsigned int num_elems_processed_per_iteration = 16;
@@ -118,10 +117,6 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
std::set<std::string> build_opts;
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- if(is_data_type_fixed_point(input->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
- }
switch(op)
{
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index 15897c9dd7..ce9d7fff67 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,12 +47,11 @@ CLReshapeLayerKernel::CLReshapeLayerKernel()
void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size());
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index 6a18e5ffce..b9ebdc9583 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -82,11 +82,10 @@ CLBuildOptions prepare_quantized_softmax_build_options(float input_scale, float
Status validate_arguments_1DMaxShiftExpSum(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(max, sum, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, max);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, max);
const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->data_type());
@@ -102,7 +101,6 @@ Status validate_arguments_1DMaxShiftExpSum(const ITensorInfo *input, const ITens
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
}
// Checks performed when sum is configured
@@ -117,7 +115,6 @@ Status validate_arguments_1DMaxShiftExpSum(const ITensorInfo *input, const ITens
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(max, sum);
}
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(max, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(max, sum);
}
return Status{};
@@ -126,10 +123,9 @@ Status validate_arguments_1DMaxShiftExpSum(const ITensorInfo *input, const ITens
Status validate_arguments_1DNorm(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum);
// Note: output should always have a scale of 1/256 and offset 0
const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.f / 256, 0);
@@ -139,7 +135,6 @@ Status validate_arguments_1DNorm(const ITensorInfo *input, const ITensorInfo *su
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
if(!is_quantized_asymmetric)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -239,15 +234,11 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
const DataType dt = input->info()->data_type();
const size_t reduction_dim_size = input->info()->dimension(0);
- auto beta_int = static_cast<int>(lround(beta * (1 << input->info()->fixed_point_position())));
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
- build_opts.add_option_if(is_data_type_fixed_point(dt),
- "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16");
- build_opts.add_option_if(is_data_type_fixed_point(dt) && (beta != 1.0f), "-DBETA=" + support::cpp11::to_string(beta_int));
build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
@@ -364,8 +355,6 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()),
- "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
build_opts.add_options_if(is_quantized_asymmetric,
prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp
index 8260606a7d..3d584345d7 100644
--- a/src/core/CL/kernels/CLTransposeKernel.cpp
+++ b/src/core/CL/kernels/CLTransposeKernel.cpp
@@ -57,8 +57,8 @@ TensorShape transposed_tensor_shape(const TensorShape &in)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
@@ -68,7 +68,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index b012d58d59..5243c4099e 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -42,13 +42,12 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, c
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
if(biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 4) && (biases->num_dimensions() != 1));
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 5) && (biases->num_dimensions() != 2));
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 4) && (biases->dimension(0) != input->tensor_shape()[3]));
@@ -60,7 +59,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_weights_reshaped_shape(*input, biases != nullptr));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
}
@@ -96,7 +94,6 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
build_opts.add_option_if(biases != nullptr, "-DHAS_BIAS");
- build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
// Create kernel
std::string kernel_name = std::string("reshape_to_columns_") + lower_string(string_from_data_layout(data_layout));
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 56d6ec8f16..587ba690c2 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -60,10 +60,9 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::F16, DataType::U32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::F16, DataType::U32,
DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0));
for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
diff --git a/src/core/CPP/kernels/CPPPermuteKernel.cpp b/src/core/CPP/kernels/CPPPermuteKernel.cpp
index 5c93f3e93a..17eaec2670 100644
--- a/src/core/CPP/kernels/CPPPermuteKernel.cpp
+++ b/src/core/CPP/kernels/CPPPermuteKernel.cpp
@@ -40,8 +40,8 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(perm.num_dimensions() > 4, "Only up to 4D permutation vectors are supported");
@@ -53,7 +53,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
index 828782338c..874c3368a5 100644
--- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
@@ -55,11 +55,10 @@ void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, Act
if(output != nullptr)
{
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
_output = output;
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
index 9a592dfe00..c745f3ff3c 100644
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
@@ -48,27 +48,23 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, mean, var);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var);
if(output->total_size() != 0)
{
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
if(beta != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, beta);
}
if(gamma != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, gamma);
}
if(act_info.enabled())
{
@@ -86,7 +82,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
ITensorInfo *beta, ITensorInfo *gamma)
{
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
unsigned int num_elems_processed_per_iteration = 1;
if(input->data_type() == DataType::F16)
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
index c2374096a2..a0d1876315 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -69,8 +69,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con
auto_init_if_empty(*output->info(),
output_shape,
1,
- input->info()->data_type(),
- input->info()->fixed_point_position());
+ input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2));
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 67a1530431..ecff233382 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -80,12 +80,11 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
output_shape.set(2, weights->info()->dimension(3));
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric());
_conv_stride_x = std::get<0>(conv_info.stride());
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
index 171fbad702..efd5747985 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
@@ -51,7 +51,7 @@ void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *out
output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f));
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
index d576c30f80..8ead05abfb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
@@ -97,7 +97,6 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n));
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
}
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
index 5d9f9c2d3e..dfbd0216b4 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
@@ -49,7 +49,7 @@ void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *outp
output_shape.set(1, static_cast<size_t>(std::ceil((input->info()->dimension(0) / static_cast<float>(transpose_w)))));
// Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
index 6c896168ed..21971903eb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
@@ -53,7 +53,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -253,7 +252,7 @@ void GCIm2ColKernel::run_generic(const Window &window)
if(_input->info()->data_type() == DataType::F16)
{
(dynamic_cast<TensorInfo *>(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0,
- _input->info()->total_size(), _input->info()->fixed_point_position());
+ _input->info()->total_size());
}
_kernel.use();
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index 3a0944cd48..f225ebde6b 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -75,7 +75,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
unsigned int pooled_w = 0;
unsigned int pooled_h = 0;
@@ -118,8 +117,7 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
auto_init(input, output, pooled_w, pooled_h);
- BorderSize border_size = BorderSize(pool_pad_y, pool_pad_x);
- const DataType data_type = input->data_type();
+ BorderSize border_size = BorderSize(pool_pad_y, pool_pad_x);
const int input_width = input->dimension(0);
const int input_height = input->dimension(1);
@@ -131,7 +129,7 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
{
// Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
// each thread computes 4 output elements
- const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type);
+ const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
int num_elems_read_per_iteration = pool_size;
@@ -261,8 +259,6 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
_output = output;
_pool_info = pool_info;
- const DataType data_type = input->info()->data_type();
-
// Set build options
std::set<std::string> build_opts;
build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
@@ -293,7 +289,7 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
{
// Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
// each thread computes 4 output elements
- const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type);
+ const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size);
if(is_pool3x3_stride_le3)
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
index 040a66358f..7ae2fc9fa5 100644
--- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
output_shape.set(0, 1);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
@@ -110,8 +110,8 @@ void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTen
ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type());
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -204,10 +204,9 @@ void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *su
ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
_input = input;
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
index bda08e4238..7248891abe 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -49,7 +49,7 @@ void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output)
output_shape.set(1, h_out);
// Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index ec125154a4..bdc93ed1b8 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -23,7 +23,6 @@
*/
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/NEAsymm.h"
@@ -46,14 +45,13 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::F16, DataType::F32);
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -146,36 +144,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
};
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
- // Activation functions : QS8
- static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
- {
- { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
- { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
- { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
- { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
- { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
- { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint8_t> },
- { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint8_t> },
- { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
- { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
- { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
- { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
- };
- // Activation functions : QS16
- static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
- {
- { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
- { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
- { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
- { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
- { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
- { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint16_t> },
- { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint16_t> },
- { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
- { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
- { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
- { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
- };
// Activation functions : QASYMM8
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
{
@@ -188,12 +156,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
case DataType::QASYMM8:
_func = act_map_qasymm8[activation_info.activation()];
break;
- case DataType::QS8:
- _func = act_map_qs8[activation_info.activation()];
- break;
- case DataType::QS16:
- _func = act_map_qs16[activation_info.activation()];
- break;
case DataType::F32:
_func = act_map_f32[activation_info.activation()];
break;
@@ -508,70 +470,6 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL
}
template <ActivationLayerInfo::ActivationFunction F, typename T>
-typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
-{
- Iterator input(_input, window);
- Iterator output(_output, window);
- const int fixed_point_position = _input->info()->fixed_point_position();
-
- static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
- const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
- const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
- const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
-
- const qint8x16_t in = vld1q_qs8(input_ptr);
- qint8x16_t tmp = {};
-
- switch(F)
- {
- case ActivationFunction::ABS:
- tmp = vqabsq_qs8(in);
- break;
- case ActivationFunction::LINEAR:
- tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
- break;
- case ActivationFunction::LOGISTIC:
- tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
- break;
- case ActivationFunction::RELU:
- tmp = vmaxq_qs8(CONST_0, in);
- break;
- case ActivationFunction::BOUNDED_RELU:
- tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
- break;
- case ActivationFunction::LU_BOUNDED_RELU:
- tmp = vminq_qs8(a, vmaxq_qs8(b, in));
- break;
- case ActivationFunction::LEAKY_RELU:
- tmp = vbslq_s8(vcgtq_s8(in, CONST_0), in, vmulq_qs8(a, in, fixed_point_position));
- break;
- case ActivationFunction::SOFT_RELU:
- tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
- break;
- case ActivationFunction::SQRT:
- tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
- break;
- case ActivationFunction::SQUARE:
- tmp = vqmulq_qs8(in, in, fixed_point_position);
- break;
- case ActivationFunction::TANH:
- tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
- break;
- default:
- break;
- }
-
- vst1q_qs8(output_ptr, tmp);
- },
- input, output);
-}
-
-template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
{
Iterator input(_input, window);
@@ -620,137 +518,6 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
input, output);
}
-template <ActivationLayerInfo::ActivationFunction F, typename T>
-typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
-{
- Iterator input(_input, window);
- Iterator output(_output, window);
- const int fixed_point_position = _input->info()->fixed_point_position();
-
- static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
- const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
- const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
- const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
-
- const qint16x8x2_t in = vld2q_s16(input_ptr);
- qint16x8x2_t tmp = { {} };
-
- switch(F)
- {
- case ActivationFunction::ABS:
- tmp =
- {
- {
- vqabsq_qs16(in.val[0]),
- vqabsq_qs16(in.val[1]),
- }
- };
- break;
- case ActivationFunction::LINEAR:
- tmp =
- {
- {
- vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
- vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
- }
- };
- break;
- case ActivationFunction::LOGISTIC:
- tmp =
- {
- {
- vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
- vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
- }
- };
- break;
- case ActivationFunction::RELU:
- tmp =
- {
- {
- vmaxq_qs16(CONST_0, in.val[0]),
- vmaxq_qs16(CONST_0, in.val[1]),
- }
- };
- break;
- case ActivationFunction::BOUNDED_RELU:
- tmp =
- {
- {
- vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
- vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
- }
- };
- break;
- case ActivationFunction::LU_BOUNDED_RELU:
- tmp =
- {
- {
- vminq_qs16(a, vmaxq_qs16(b, in.val[0])),
- vminq_qs16(a, vmaxq_qs16(b, in.val[1])),
- }
- };
- break;
- case ActivationFunction::LEAKY_RELU:
- tmp =
- {
- {
- vbslq_s16(vcgtq_s16(in.val[0], CONST_0), in.val[0], vmulq_qs16(a, in.val[0], fixed_point_position)),
- vbslq_s16(vcgtq_s16(in.val[1], CONST_0), in.val[1], vmulq_qs16(a, in.val[1], fixed_point_position)),
- }
- };
- break;
- case ActivationFunction::SOFT_RELU:
- tmp =
- {
- {
- vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
- vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
- }
- };
- break;
- case ActivationFunction::SQRT:
- tmp =
- {
- {
- vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
- vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
- }
- };
- break;
- case ActivationFunction::SQUARE:
- tmp =
- {
- {
- vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
- vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
- }
- };
- break;
- case ActivationFunction::TANH:
- tmp =
- {
- {
- vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
- vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
- }
- };
- break;
- default:
- ARM_COMPUTE_ERROR("Function not implemented");
- break;
- }
-
- vst2q_qs16(output_ptr, tmp);
- },
- input, output);
-}
-
Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index a487090a98..f8e2b6d73e 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -48,38 +48,6 @@ namespace
{
constexpr unsigned int num_elems_processed_per_iteration = 16;
-void add_wrap_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
-{
- Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
- Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
- const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
-
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vaddq_qs8(a, b));
- },
- input1, input2, output);
-}
-
-void add_saturate_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
-{
- Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
- Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
- const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
-
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqaddq_qs8(a, b));
- },
- input1, input2, output);
-}
-
void add_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
@@ -362,28 +330,21 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
{
ARM_COMPUTE_UNUSED(policy);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- if(is_data_type_fixed_point(input1.data_type()) || is_data_type_fixed_point(input2.data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(&input1, &input2);
- }
-
// Validate in case of configured output
if(output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- !(input1.data_type() == DataType::QS8 && input2.data_type() == DataType::QS8 && output.data_type() == DataType::QS8)
- && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8)
+ !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
- && !(input1.data_type() == DataType::QS16 && input2.data_type() == DataType::QS16 && output.data_type() == DataType::QS16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32 && output.data_type() == DataType::F32)
&& !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16 && output.data_type() == DataType::F16),
@@ -391,11 +352,6 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
"Wrong shape for output");
-
- if(is_data_type_fixed_point(input1.data_type()) || is_data_type_fixed_point(output.data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(&input1, &output);
- }
}
return Status{};
@@ -460,8 +416,6 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
static std::map<std::string, AddFunction *> map_function =
{
- { "add_wrap_QS8_QS8_QS8", &add_wrap_QS8_QS8_QS8 },
- { "add_saturate_QS8_QS8_QS8", &add_saturate_QS8_QS8_QS8 },
{ "add_wrap_U8_U8_U8", &add_wrap_U8_U8_U8 },
{ "add_saturate_U8_U8_U8", &add_saturate_U8_U8_U8 },
{ "add_wrap_S16_U8_S16", &add_wrap_S16_U8_S16 },
@@ -470,8 +424,6 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor
{ "add_saturate_U8_S16_S16", &add_saturate_U8_S16_S16 },
{ "add_wrap_U8_U8_S16", &add_wrap_U8_U8_S16 },
{ "add_saturate_U8_U8_S16", &add_saturate_U8_U8_S16 },
- { "add_wrap_QS16_QS16_QS16", &add_wrap_S16_S16_S16 },
- { "add_saturate_QS16_QS16_QS16", &add_saturate_S16_S16_S16 },
{ "add_wrap_S16_S16_S16", &add_wrap_S16_S16_S16 },
{ "add_saturate_S16_S16_S16", &add_saturate_S16_S16_S16 },
{ "add_wrap_F32_F32_F32", &add_F32_F32_F32 },
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 3db80285c0..5a162e3b2c 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,38 +45,6 @@ class Coordinates;
namespace
{
-void sub_wrap_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
-{
- Iterator input1(in1, window);
- Iterator input2(in2, window);
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
- const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
-
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vsubq_qs8(a, b));
- },
- input1, input2, output);
-}
-
-void sub_saturate_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
-{
- Iterator input1(in1, window);
- Iterator input2(in2, window);
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
- const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
-
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqsubq_qs8(a, b));
- },
- input1, input2, output);
-}
-
void sub_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window);
@@ -353,23 +321,15 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i
{
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
-
- if(is_data_type_fixed_point(input1->data_type()) || is_data_type_fixed_point(input2->data_type()) || is_data_type_fixed_point(output->data_type()))
- {
- // Check that all data types are the same and all fixed-point positions are the same
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
- }
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- !(input1->data_type() == DataType::QS8 && input2->data_type() == DataType::QS8 && output->data_type() == DataType::QS8)
- && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
+ !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
&& !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
&& !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
&& !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
- && !(input1->data_type() == DataType::QS16 && input2->data_type() == DataType::QS16 && output->data_type() == DataType::QS16)
&& !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
&& !(input1->data_type() == DataType::F32 && input2->data_type() == DataType::F32 && output->data_type() == DataType::F32)
&& !(input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16 && output->data_type() == DataType::F16),
@@ -432,8 +392,6 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
static std::map<std::string, NEArithmeticSubtractionKernel::SubFunction *> map_function =
{
- { "sub_wrap_QS8_QS8_QS8", &sub_wrap_QS8_QS8_QS8 },
- { "sub_saturate_QS8_QS8_QS8", &sub_saturate_QS8_QS8_QS8 },
{ "sub_wrap_U8_U8_U8", &sub_wrap_U8_U8_U8 },
{ "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 },
{ "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 },
@@ -442,8 +400,6 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{ "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
{ "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },
{ "sub_saturate_S16_U8_S16", &sub_saturate_S16_U8_S16 },
- { "sub_wrap_QS16_QS16_QS16", &sub_wrap_S16_S16_S16 },
- { "sub_saturate_QS16_QS16_QS16", &sub_saturate_S16_S16_S16 },
{ "sub_wrap_S16_S16_S16", &sub_wrap_S16_S16_S16 },
{ "sub_saturate_S16_S16_S16", &sub_saturate_S16_S16_S16 },
{ "sub_wrap_F32_F32_F32", &sub_F32_F32_F32 },
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 6be50fdb0d..6aed41f3aa 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -43,7 +43,7 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT
const ITensorInfo *beta, const ITensorInfo *gamma, float epsilon, ActivationLayerInfo act_info)
{
ARM_COMPUTE_UNUSED(epsilon);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16,
DataType::F32);
if(act_info.enabled())
@@ -60,22 +60,18 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, mean, var);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, var);
if(beta != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
}
if(gamma != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
}
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0));
@@ -104,112 +100,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} //namespace
template <bool fused_activation>
-void NEBatchNormalizationLayerKernel::batch_normalization_qs8(const Window &window)
-{
- static_assert(!fused_activation, "Activation is not supported for QS8");
-
- Iterator input(_input, window);
- Iterator output(_output, window);
-
- // Hold information about the current feature map we are iterating.
- // Only compute denominator and NEON vectors once per feature map.
- int slice = -1;
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- const auto input_mean = reinterpret_cast<const qint8_t *>(_mean->ptr_to_element(Coordinates(0, 0)));
- const auto input_var = reinterpret_cast<const qint8_t *>(_var->ptr_to_element(Coordinates(0, 0)));
- const auto input_gamma = (_gamma != nullptr) ? reinterpret_cast<const qint8_t *>(_gamma->ptr_to_element(Coordinates(0, 0))) : nullptr;
- const auto input_beta = (_beta != nullptr) ? reinterpret_cast<const qint8_t *>(_beta->ptr_to_element(Coordinates(0, 0))) : nullptr;
-
- qint8x16_t mean_vec = vdupq_n_qs8(0);
- qint8x16_t var_vec = vdupq_n_qs8(0);
- qint8x16_t gamma_vec = vdupq_n_qs8(sqcvt_qs8_f32(1, fixed_point_position));
- qint8x16_t beta_vec = vdupq_n_qs8(sqcvt_qs8_f32(0, fixed_point_position));
- qint8x16_t denominator = vdupq_n_qs8(0);
- const qint8x16_t epsilon_vec = vdupq_n_qs8(sqcvt_qs8_f32(_epsilon, fixed_point_position));
- execute_window_loop(window, [&](const Coordinates & id)
- {
- if(slice != id.z())
- {
- // Conctruct vectors
- mean_vec = vdupq_n_qs8(*(input_mean + id.z()));
- var_vec = vdupq_n_qs8(*(input_var + id.z()));
- if(input_gamma != nullptr)
- {
- gamma_vec = vdupq_n_qs8(*(input_gamma + id.z()));
- }
- if(input_beta != nullptr)
- {
- beta_vec = vdupq_n_qs8(*(input_beta + id.z()));
- }
-
- // Calculate denominator
- denominator = vqinvsqrtq_qs8(vqaddq_qs8(var_vec, epsilon_vec), fixed_point_position);
- slice = id.z();
- }
-
- // Calculate x bar and store results
- const qint8x16_t numerator = vqsubq_qs8(vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr())), mean_vec);
- const qint8x16_t x_bar = vqmulq_qs8(numerator, denominator, fixed_point_position);
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmlaq_qs8(beta_vec, x_bar, gamma_vec, fixed_point_position));
- },
- input, output);
-}
-
-template <bool fused_activation>
-void NEBatchNormalizationLayerKernel::batch_normalization_qs16(const Window &window)
-{
- static_assert(!fused_activation, "Activation is not supported for QS16");
-
- Iterator input(_input, window);
- Iterator output(_output, window);
-
- // Hold information about the current feature map we are iterating.
- // Only compute denominator and NEON vectors once per feature map.
- int slice = -1;
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- const auto input_mean = reinterpret_cast<const qint16_t *>(_mean->ptr_to_element(Coordinates(0, 0)));
- const auto input_var = reinterpret_cast<const qint16_t *>(_var->ptr_to_element(Coordinates(0, 0)));
- const auto input_gamma = (_gamma != nullptr) ? reinterpret_cast<const qint16_t *>(_gamma->ptr_to_element(Coordinates(0, 0))) : nullptr;
- const auto input_beta = (_beta != nullptr) ? reinterpret_cast<const qint16_t *>(_beta->ptr_to_element(Coordinates(0, 0))) : nullptr;
-
- qint16x8_t mean_vec = vdupq_n_qs16(0);
- qint16x8_t var_vec = vdupq_n_qs16(0);
- qint16x8_t gamma_vec = vdupq_n_qs16(sqcvt_qs16_f32(1, fixed_point_position));
- qint16x8_t beta_vec = vdupq_n_qs16(sqcvt_qs16_f32(0, fixed_point_position));
- qint16x8_t denominator = vdupq_n_qs16(0);
- const qint16x8_t epsilon_vec = vdupq_n_qs16(sqcvt_qs16_f32(_epsilon, fixed_point_position));
- execute_window_loop(window, [&](const Coordinates & id)
- {
- if(slice != id.z())
- {
- // Conctruct vectors
- mean_vec = vdupq_n_qs16(*(input_mean + id.z()));
- var_vec = vdupq_n_qs16(*(input_var + id.z()));
- if(input_gamma != nullptr)
- {
- gamma_vec = vdupq_n_qs16(*(input_gamma + id.z()));
- }
- if(input_beta != nullptr)
- {
- beta_vec = vdupq_n_qs16(*(input_beta + id.z()));
- }
-
- // Calculate denominator
- denominator = vqinvsqrtq_qs16(vqaddq_qs16(var_vec, epsilon_vec), fixed_point_position);
- slice = id.z();
- }
-
- // Calculate x bar and store results
- const qint16x8_t numerator = vqsubq_qs16(vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr())), mean_vec);
- const qint16x8_t x_bar = vqmulq_qs16(numerator, denominator, fixed_point_position);
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmlaq_qs16(beta_vec, x_bar, gamma_vec, fixed_point_position));
- },
- input, output);
-}
-
-template <bool fused_activation>
void NEBatchNormalizationLayerKernel::batch_normalization_fp16_nchw(const Window &window)
{
static_assert(!fused_activation, "Activation is not supported for FP16");
@@ -406,12 +296,6 @@ void NEBatchNormalizationLayerKernel::configure_non_fused()
const bool is_nhwc = _input->info()->data_layout() == DataLayout::NHWC;
switch(_input->info()->data_type())
{
- case DataType::QS8:
- _func = &NEBatchNormalizationLayerKernel::batch_normalization_qs8<false>;
- break;
- case DataType::QS16:
- _func = &NEBatchNormalizationLayerKernel::batch_normalization_qs16<false>;
- break;
case DataType::F16:
_func = (is_nhwc) ? &NEBatchNormalizationLayerKernel::batch_normalization_fp16_nhwc<false> : &NEBatchNormalizationLayerKernel::batch_normalization_fp16_nchw<false>;
break;
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
index 9fda65feb4..d09d174e4f 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ b/src/core/NEON/kernels/NECol2ImKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,8 +50,8 @@ TensorShape get_output_shape(const ITensorInfo *input, const Size2D &convolved_d
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
@@ -60,7 +60,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), get_output_shape(input, convolved_dims));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
index b3746bddf2..e581f221a3 100644
--- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
@@ -65,7 +65,7 @@ void NEConvertFullyConnectedWeightsKernel::configure(const ITensor *input, ITens
Status NEConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::QS32, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index 891a03c5cc..38443ca4a8 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,10 +41,6 @@ using namespace arm_compute;
namespace
{
// Overloads of 128-bit vector loads
-uint8x16_t loadq(const uint8_t *ptr)
-{
- return vld1q_u8(ptr);
-}
uint16x8_t loadq(const uint16_t *ptr)
{
return vld1q_u16(ptr);
@@ -54,10 +50,6 @@ uint32x4_t loadq(const uint32_t *ptr)
return vld1q_u32(ptr);
}
// Overloads of 128-bit vector stores
-void storeq(uint8_t *ptr, uint8x16_t val)
-{
- return vst1q_u8(ptr, val);
-}
void storeq(uint16_t *ptr, uint16x8_t val)
{
return vst1q_u16(ptr, val);
@@ -107,9 +99,8 @@ BorderSize NEDepthConcatenateLayerKernel::border_size() const
void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
@@ -129,10 +120,6 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int
switch(input->info()->data_type())
{
- case DataType::QS8:
- _func = &depth_concat<uint8_t>;
- break;
- case DataType::QS16:
case DataType::F16:
_func = &depth_concat<uint16_t>;
break;
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index c29cb57513..8280b52fcb 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,13 +40,13 @@ class Coordinates;
} // namespace arm_compute
NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
- : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0)
+ : _input(nullptr), _output(nullptr), _policy(), _shift(0)
{
}
void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16);
_input = input;
_output = input;
@@ -58,48 +58,26 @@ void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, Conve
// Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
set_shape_if_empty(*output->info(), input->info()->tensor_shape());
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
// Set output
_output = output;
}
- // Set initial fixed point position of input and output
- _fixed_point_position_input = input->info()->fixed_point_position();
- _fixed_point_position_output = _output->info()->fixed_point_position();
-
- // Set the fixed point position to the output tensor if needed
- if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type()))
- {
- // If in-place set the fixed point position of the output tensor to be equal to shift
- _fixed_point_position_output = (_input == _output) ? static_cast<int>(_shift) : _fixed_point_position_output;
- // Set fixed point position to output tensor
- _output->info()->set_fixed_point_position(_fixed_point_position_output);
- }
-
- ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type())));
+ ARM_COMPUTE_ERROR_ON(shift >= 8);
ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
&& output->info()->data_type() != DataType::S32),
"Only data_types supported [in] U8 -> [out] U16, S16, S32");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32),
- "Only data_types supported [in] QS8 -> [out] QS8, F32");
-
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
"Only data_types supported [in] U16 -> [out] U8, U32");
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
"Only data_types supported [in] S16 -> [out] U8, S32");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32),
- "Only data_types supported [in] QS16 -> [out] QS16, F32");
-
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
- "Only data_types supported [in] F32 -> [out] QS8, QS16");
-
constexpr unsigned int num_elems_processed_per_iteration = 16;
// Configure kernel window
@@ -132,8 +110,6 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
Iterator input(_input, window);
Iterator output(_output, window);
- bool in_place = (_input == _output);
-
switch(_input->info()->data_type())
{
case DataType::U8:
@@ -212,49 +188,6 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
}
break;
}
- case DataType::QS8:
- {
- switch(_output->info()->data_type())
- {
- case DataType::QS8:
- {
- const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
- /* Fixed point position conversion QS8 -> QS8 */
- if(relative_shift != 0 || !in_place)
- {
- const auto relative_shift_vec = vdupq_n_qs8(relative_shift);
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec));
- },
- input, output);
- }
- break;
- }
- case DataType::F32:
- {
- /* Up-conversion QS8 -> F32 */
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
-
- float32x4x2_t texels_low = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input);
- float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input);
-
- vst1q_f32(reinterpret_cast<float *>(output.ptr()), texels_low.val[0]);
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, texels_low.val[1]);
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, texels_high.val[0]);
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, texels_high.val[1]);
- },
- input, output);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Output data type not supported");
- }
- break;
- }
case DataType::S16:
{
switch(_output->info()->data_type())
@@ -408,116 +341,6 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
}
break;
}
- case DataType::QS16:
- {
- switch(_output->info()->data_type())
- {
- case DataType::QS16:
- {
- const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
- /* Fixed point position conversion QS16 -> QS16 */
- if(relative_shift != 0 || !in_place)
- {
- const auto relative_shift_vec = vdupq_n_qs16(relative_shift);
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint16x8x2_t texels_qs16 =
- {
- {
- vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr())),
- vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
- }
- };
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec));
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec));
- },
- input, output);
- }
- break;
- }
- case DataType::F32:
- {
- /* Up-conversion QS16 -> F32 */
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const int16x8x2_t texels_qs16 =
- {
- {
- vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
- vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
- }
- };
-
- vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input));
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input));
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input));
- vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input));
- },
- input, output);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Output data type not supported");
- }
- break;
- }
- case DataType::F32:
- {
- switch(_output->info()->data_type())
- {
- case DataType::QS8:
- {
- /* Down-conversion F32 -> QS8 */
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const float32x4x4_t texels_f32 =
- {
- {
- vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
- }
- };
-
- const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output);
-
- vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
- },
- input, output);
- break;
- }
- case DataType::QS16:
- {
- /* Down-conversion F32 -> QS16 */
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const float32x4x2_t texels_f32_1 =
- {
- {
- vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
- }
- };
- const float32x4x2_t texels_f32_2 =
- {
- {
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
- vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
- }
- };
-
- vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output));
- vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output));
- },
- input, output);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Output data type not supported");
- }
- break;
- }
default:
ARM_COMPUTE_ERROR("Not supported");
}
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 8cdf175d8a..09728e2a8d 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -115,7 +115,7 @@ public:
in_top += delta_input, in_mid += delta_input, in_low += delta_input,
p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, 0, input_offset);
+ auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, input_offset);
store_results<stridex>(p_out, vres);
}
}
diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
index cfd8eacfdd..5b43e2b14f 100644
--- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
@@ -122,7 +122,6 @@ void NEDepthwiseIm2ColKernel::configure(const ITensor *input, ITensor *output, c
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && has_bias);
ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != output->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0)));
diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
index 8960d8a8af..86a6d1c1a8 100644
--- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
@@ -89,7 +89,6 @@ void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *o
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
_input = input;
_output = output;
diff --git a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
index 36b17bfc4c..47fcf12874 100644
--- a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
@@ -88,7 +88,6 @@ void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *o
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && (biases != nullptr));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(1));
ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) + ((biases != nullptr) ? 1 : 0)));
@@ -96,7 +95,6 @@ void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *o
if(biases != nullptr)
{
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != input->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
}
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 4120e5f87a..47c895c594 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *min_max)
{
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32, 0);
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
constexpr unsigned int num_elems_processed_per_iteration = 8;
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 5eafdf0363..54a046846a 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -43,34 +43,6 @@ using namespace arm_compute::detail;
namespace
{
-template <unsigned int stridex>
-qint16x8_t internal_vld1q(const qint16_t *in);
-
-template <>
-qint16x8_t internal_vld1q<1>(const qint16_t *in)
-{
- return vld1q_qs16(in);
-}
-
-template <>
-qint16x8_t internal_vld1q<2>(const qint16_t *in)
-{
- const int16x8x2_t tmp = vld2q_s16(in);
- return tmp.val[0];
-}
-
-template <>
-qint16x8_t internal_vld1q<3>(const qint16_t *in)
-{
- const int16x8x3_t tmp = vld3q_s16(in);
- return tmp.val[0];
-}
-
-inline qint16x8_t internal_vdupq_n(qint16_t v)
-{
- return vdupq_n_qs16(v);
-}
-
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <unsigned int stridex>
float16x8_t internal_vld1q(const float16_t *in);
@@ -105,15 +77,13 @@ inline void internal_vst1q(float16_t *p, const float16x8_t &v)
vst1q_f16(p, v);
}
-float16x8_t internal_vmull(const float16x8_t &x, const float16x8_t &y, int fixed_point_position)
+float16x8_t internal_vmull(const float16x8_t &x, const float16x8_t &y)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
return vmulq_f16(x, y);
}
-inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, const float16x8_t &z, int fixed_point_position)
+inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, const float16x8_t &z)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
return vaddq_f16(x, vmulq_f16(y, z));
}
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -151,107 +121,16 @@ inline void internal_vst1q(float *p, const float32x4_t &v)
vst1q_f32(p, v);
}
-float32x4_t internal_vmull(const float32x4_t &x, const float32x4_t &y, int fixed_point_position)
+float32x4_t internal_vmull(const float32x4_t &x, const float32x4_t &y)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
return vmulq_f32(x, y);
}
-inline float32x4_t internal_vmlal(const float32x4_t &x, const float32x4_t &y, const float32x4_t &z, int fixed_point_position)
+inline float32x4_t internal_vmlal(const float32x4_t &x, const float32x4_t &y, const float32x4_t &z)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
return vmlaq_f32(x, y, z);
}
-template <unsigned int stridex>
-qint8x8_t internal_vld1q(const qint8_t *in);
-
-template <>
-qint8x8_t internal_vld1q<1>(const qint8_t *in)
-{
- return vld1_qs8(in);
-}
-
-template <>
-qint8x8_t internal_vld1q<2>(const qint8_t *in)
-{
- const qint8x8x2_t tmp = vld2_s8(in);
- return tmp.val[0];
-}
-
-template <>
-qint8x8_t internal_vld1q<3>(const qint8_t *in)
-{
- const qint8x8x3_t tmp = vld3_s8(in);
- return tmp.val[0];
-}
-
-inline qint8x8_t internal_vdupq_n(qint8_t v)
-{
- return vdup_n_qs8(v);
-}
-
-inline qint16x8_t internal_vmull(const qint8x8_t &x, const qint8x8_t &y, int fixed_point_position)
-{
- return vmull_qs8(x, y, fixed_point_position);
-}
-
-inline qint16x8_t internal_vmlal(const qint16x8_t &x, const qint8x8_t &y, const qint8x8_t &z, int fixed_point_position)
-{
- return vqmlal_qs8(x, y, z, fixed_point_position);
-}
-
-inline void internal_vst1q(qint16_t *p, const qint16x8_t &v)
-{
- vst1q_qs16(p, v);
-}
-
-inline void internal_vst1q(int32_t *p, const qint32x4x2_t &v)
-{
- vst1q_s32(p, v.val[0]);
- vst1q_s32(p + 4, v.val[1]);
-}
-
-template <unsigned int stridex>
-qint32x4x2_t internal_vld1q(const qint32_t *in);
-
-template <>
-qint32x4x2_t internal_vld1q<1>(const qint32_t *in)
-{
- const qint32x4x2_t r =
- {
- {
- vld1q_s32(in),
- vld1q_s32(in + 4)
- }
- };
- return r;
-}
-
-inline qint32x4x2_t internal_vmull(const qint16x8_t &x, const qint16x8_t &y, int fixed_point_position)
-{
- const qint32x4x2_t r =
- {
- {
- vmull_qs16(vget_low_s16(x), vget_low_s16(y), fixed_point_position),
- vmull_qs16(vget_high_s16(x), vget_high_s16(y), fixed_point_position),
- }
- };
- return r;
-}
-
-inline qint32x4x2_t internal_vmlal(const qint32x4x2_t &x, const qint16x8_t &y, const qint16x8_t &z, int fixed_point_position)
-{
- const qint32x4x2_t r =
- {
- {
- vqmlal_qs16(x.val[0], vget_low_s16(y), vget_low_s16(z), fixed_point_position),
- vqmlal_qs16(x.val[1], vget_high_s16(y), vget_high_s16(z), fixed_point_position)
- }
- };
- return r;
-}
-
constexpr int small_tensor_size_optim = 8;
inline bool run_optim_small_tensor_info(const ITensorInfo *t)
{
@@ -355,21 +234,20 @@ public:
static void convolve(const Window &window, unsigned int num_elems_read_per_iteration, unsigned int num_elems_written_per_iteration,
const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
{
- const int input_stride_x = input->info()->strides_in_bytes().x();
- const int input_stride_y = input->info()->strides_in_bytes().y();
- const int input_stride_z = input->info()->strides_in_bytes().z();
- const int output_stride_y = output->info()->strides_in_bytes().y();
- const int output_stride_z = output->info()->strides_in_bytes().z();
- const int kernel_stride_z = weights->info()->strides_in_bytes().z();
- const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
- const int output_w = output->info()->dimension(0);
- const int output_h = output->info()->dimension(1);
- const int range_z = window.z().end() - window.z().start();
- const int kernel_depth = weights->info()->dimension(Window::DimZ);
- const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
- const unsigned int conv_pad_left = conv_info.pad_left();
- const unsigned int conv_pad_top = conv_info.pad_top();
- const int fixed_point_position = input->info()->fixed_point_position();
+ const int input_stride_x = input->info()->strides_in_bytes().x();
+ const int input_stride_y = input->info()->strides_in_bytes().y();
+ const int input_stride_z = input->info()->strides_in_bytes().z();
+ const int output_stride_y = output->info()->strides_in_bytes().y();
+ const int output_stride_z = output->info()->strides_in_bytes().z();
+ const int kernel_stride_z = weights->info()->strides_in_bytes().z();
+ const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
+ const int output_w = output->info()->dimension(0);
+ const int output_h = output->info()->dimension(1);
+ const int range_z = window.z().end() - window.z().start();
+ const int kernel_depth = weights->info()->dimension(Window::DimZ);
+ const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
+ const unsigned int conv_pad_left = conv_info.pad_left();
+ const unsigned int conv_pad_top = conv_info.pad_top();
// setup output window for the iterator
Window window_out = window;
@@ -414,7 +292,7 @@ public:
auto p_out = reinterpret_cast<T2 *>(p_out_base + oh * output_stride_y);
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration, in_val += num_elems_read_per_iteration, p_out += num_elems_written_per_iteration)
{
- internal_vst1q(p_out, internal_vmull(vk, internal_vld1q<stridex>(in_val), fixed_point_position));
+ internal_vst1q(p_out, internal_vmull(vk, internal_vld1q<stridex>(in_val)));
}
}
}
@@ -431,7 +309,7 @@ public:
auto p_out = reinterpret_cast<T2 *>(p_out_base + oh * output_stride_y);
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration, in_val += num_elems_read_per_iteration, p_out += num_elems_written_per_iteration)
{
- internal_vst1q(p_out, internal_vmlal(internal_vld1q<1>(p_out), vk, internal_vld1q<stridex>(in_val), fixed_point_position));
+ internal_vst1q(p_out, internal_vmlal(internal_vld1q<1>(p_out), vk, internal_vld1q<stridex>(in_val)));
}
}
}
@@ -469,7 +347,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values)
template <unsigned int stridex>
float32x4x2_t convolve_5x5(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
- const float *m0, const float *m1, const float *m2, const float *m3, const float *m4, int fixed_point_position);
+ const float *m0, const float *m1, const float *m2, const float *m3, const float *m4);
inline float32x4x3_t load_matrix_hi(const float *const m0, const float *const m1, const float *const m2)
{
@@ -511,9 +389,8 @@ inline float32x4x3_t load_input(const float *const in)
template <>
inline float32x4x2_t convolve_5x5<1>(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
- const float *m0, const float *m1, const float *m2, const float *m3, const float *m4, int fixed_point_position)
+ const float *m0, const float *m1, const float *m2, const float *m3, const float *m4)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
const float32x4x3_t vin0 = load_input(in_0);
const float32x4x3_t vin1 = load_input(in_1);
const float32x4x3_t vin2 = load_input(in_2);
@@ -601,10 +478,9 @@ inline float32x4x2_t convolve_5x5<1>(const float *in_0, const float *in_1, const
template <>
inline float32x4x2_t convolve_5x5<2>(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
- const float *m0, const float *m1, const float *m2, const float *m3, const float *m4, int fixed_point_position)
+ const float *m0, const float *m1, const float *m2, const float *m3, const float *m4)
{
- ARM_COMPUTE_UNUSED(fixed_point_position);
- float32x4x2_t out = convolve_5x5<1>(in_0, in_1, in_2, in_3, in_4, m0, m1, m2, m3, m4, fixed_point_position);
+ float32x4x2_t out = convolve_5x5<1>(in_0, in_1, in_2, in_3, in_4, m0, m1, m2, m3, m4);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3);
@@ -613,9 +489,9 @@ inline float32x4x2_t convolve_5x5<2>(const float *in_0, const float *in_1, const
template <>
inline float32x4x2_t convolve_5x5<3>(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
- const float *m0, const float *m1, const float *m2, const float *m3, const float *m4, int fixed_point_position)
+ const float *m0, const float *m1, const float *m2, const float *m3, const float *m4)
{
- float32x4x2_t out = convolve_5x5<1>(in_0, in_1, in_2, in_3, in_4, m0, m1, m2, m3, m4, fixed_point_position);
+ float32x4x2_t out = convolve_5x5<1>(in_0, in_1, in_2, in_3, in_4, m0, m1, m2, m3, m4);
out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1);
return out;
}
@@ -642,28 +518,6 @@ void accumulate_results<3>(float *buffer, const float32x4x2_t &values)
vst1_f32(buffer, vadd_f32(vld1_f32(buffer), vget_low_f32(values.val[0])));
}
-template <unsigned int stridex>
-void accumulate_results(qint16_t *buffer, const qint16x8x2_t &values);
-
-template <>
-void accumulate_results<1>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1q_qs16(buffer, vqaddq_qs16(vld1q_qs16(buffer), values.val[0]));
- vst1q_qs16(buffer + 8, vqaddq_qs16(vld1q_qs16(buffer + 8), values.val[1]));
-}
-
-template <>
-void accumulate_results<2>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1q_qs16(buffer, vqaddq_qs16(vld1q_qs16(buffer), values.val[0]));
-}
-
-template <>
-void accumulate_results<3>(qint16_t *buffer, const qint16x8x2_t &values)
-{
- vst1_qs16(buffer, vqadd_qs16(vld1_qs16(buffer), vget_low_s16(values.val[0])));
-}
-
template <typename T1>
class convolver_nhwc
{
@@ -745,7 +599,7 @@ public:
const auto we_addr = reinterpret_cast<const T1 *>(we_addr_base1 + x * kernel_stride_x);
const auto we_values = internal_vld1q<1>(we_addr);
- out_values = internal_vmlal(out_values, in_values, we_values, 0);
+ out_values = internal_vmlal(out_values, in_values, we_values);
}
out_val += out_values[0];
@@ -784,24 +638,23 @@ public:
const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
{
ARM_COMPUTE_UNUSED(num_elems_read_per_iteration);
- const int input_stride_x = input->info()->strides_in_bytes().x();
- const int input_stride_y = input->info()->strides_in_bytes().y();
- const int input_stride_z = input->info()->strides_in_bytes().z();
- const int output_stride_y = output->info()->strides_in_bytes().y();
- const int output_stride_z = output->info()->strides_in_bytes().z();
- const int kernel_stride_x = weights->info()->strides_in_bytes().x();
- const int kernel_stride_y = weights->info()->strides_in_bytes().y();
- const int kernel_stride_z = weights->info()->strides_in_bytes().z();
- const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
- const int output_w = output->info()->dimension(0);
- const int output_h = output->info()->dimension(1);
- const int num_planes_z = window.z().end() - window.z().start();
- const int delta_input = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
- const int kernel_depth = weights->info()->dimension(Window::DimZ);
- const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
- const unsigned int conv_pad_left = conv_info.pad_left();
- const unsigned int conv_pad_top = conv_info.pad_top();
- const int fixed_point_position = input->info()->fixed_point_position();
+ const int input_stride_x = input->info()->strides_in_bytes().x();
+ const int input_stride_y = input->info()->strides_in_bytes().y();
+ const int input_stride_z = input->info()->strides_in_bytes().z();
+ const int output_stride_y = output->info()->strides_in_bytes().y();
+ const int output_stride_z = output->info()->strides_in_bytes().z();
+ const int kernel_stride_x = weights->info()->strides_in_bytes().x();
+ const int kernel_stride_y = weights->info()->strides_in_bytes().y();
+ const int kernel_stride_z = weights->info()->strides_in_bytes().z();
+ const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
+ const int output_w = output->info()->dimension(0);
+ const int output_h = output->info()->dimension(1);
+ const int num_planes_z = window.z().end() - window.z().start();
+ const int delta_input = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
+ const int kernel_depth = weights->info()->dimension(Window::DimZ);
+ const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
+ const unsigned int conv_pad_left = conv_info.pad_left();
+ const unsigned int conv_pad_top = conv_info.pad_top();
// setup output window for the iterator
Window window_out = window;
@@ -864,7 +717,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2, fixed_point_position);
+ auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2);
store_results<stridex>(p_out, vres);
}
}
@@ -889,7 +742,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2, fixed_point_position);
+ auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2);
accumulate_results<stridex>(p_out, vres);
}
}
@@ -908,24 +761,23 @@ public:
const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
{
ARM_COMPUTE_UNUSED(num_elems_read_per_iteration);
- const int input_stride_x = input->info()->strides_in_bytes().x();
- const int input_stride_y = input->info()->strides_in_bytes().y();
- const int input_stride_z = input->info()->strides_in_bytes().z();
- const int output_stride_y = output->info()->strides_in_bytes().y();
- const int output_stride_z = output->info()->strides_in_bytes().z();
- const int kernel_stride_x = weights->info()->strides_in_bytes().x();
- const int kernel_stride_y = weights->info()->strides_in_bytes().y();
- const int kernel_stride_z = weights->info()->strides_in_bytes().z();
- const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
- const int output_w = output->info()->dimension(0);
- const int output_h = output->info()->dimension(1);
- const int num_planes_z = window.z().end() - window.z().start();
- const int delta_input = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
- const int kernel_depth = weights->info()->dimension(Window::DimZ);
- const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
- const unsigned int conv_pad_left = conv_info.pad_left();
- const unsigned int conv_pad_top = conv_info.pad_top();
- const int fixed_point_position = input->info()->fixed_point_position();
+ const int input_stride_x = input->info()->strides_in_bytes().x();
+ const int input_stride_y = input->info()->strides_in_bytes().y();
+ const int input_stride_z = input->info()->strides_in_bytes().z();
+ const int output_stride_y = output->info()->strides_in_bytes().y();
+ const int output_stride_z = output->info()->strides_in_bytes().z();
+ const int kernel_stride_x = weights->info()->strides_in_bytes().x();
+ const int kernel_stride_y = weights->info()->strides_in_bytes().y();
+ const int kernel_stride_z = weights->info()->strides_in_bytes().z();
+ const int kernel_stride_w = weights->info()->strides_in_bytes()[3];
+ const int output_w = output->info()->dimension(0);
+ const int output_h = output->info()->dimension(1);
+ const int num_planes_z = window.z().end() - window.z().start();
+ const int delta_input = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
+ const int kernel_depth = weights->info()->dimension(Window::DimZ);
+ const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
+ const unsigned int conv_pad_left = conv_info.pad_left();
+ const unsigned int conv_pad_top = conv_info.pad_top();
// setup output window for the iterator
Window window_out = window;
@@ -976,7 +828,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_0 += delta_input, in_1 += delta_input, in_2 += delta_input, in_3 += delta_input, in_4 += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_5x5<stridex>(in_0, in_1, in_2, in_3, in_4, ptr_k_r0, ptr_k_r1, ptr_k_r2, ptr_k_r3, ptr_k_r4, fixed_point_position);
+ auto vres = convolve_5x5<stridex>(in_0, in_1, in_2, in_3, in_4, ptr_k_r0, ptr_k_r1, ptr_k_r2, ptr_k_r3, ptr_k_r4);
store_results<stridex>(p_out, vres);
}
}
@@ -1001,7 +853,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_0 += delta_input, in_1 += delta_input, in_2 += delta_input, in_3 += delta_input, in_4 += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_5x5<stridex>(in_0, in_1, in_2, in_3, in_4, ptr_k_r0, ptr_k_r1, ptr_k_r2, ptr_k_r3, ptr_k_r4, fixed_point_position);
+ auto vres = convolve_5x5<stridex>(in_0, in_1, in_2, in_3, in_4, ptr_k_r0, ptr_k_r1, ptr_k_r2, ptr_k_r3, ptr_k_r4);
accumulate_results<stridex>(p_out, vres);
}
}
@@ -1120,7 +972,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
const DataLayout data_layout = input->data_layout();
@@ -1140,11 +992,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*input, *weights, conv_info);
DataType data_type = input->data_type();
- if(is_data_type_fixed_point(data_type))
- {
- // Promote data type in case of fixed point
- data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
- }
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON(output->data_type() != data_type);
@@ -1180,11 +1027,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::QS8:
- case DataType::QS16:
num_elems_written_per_iteration = 8;
break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::F32:
if(run_optim_small_tensor_info(input))
{
@@ -1215,13 +1060,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::QS8:
- case DataType::QS16:
num_weight_elems_read_per_row = 8 + kernel_size - 1;
num_elems_read_per_iteration = 24;
num_elems_written_per_iteration = 32 >> conv_stride_x;
break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Data type not supported.");
break;
@@ -1315,14 +1158,8 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
DataType data_type = input->info()->data_type();
- if(is_data_type_fixed_point(data_type))
- {
- // Promote data type in case of fixed point
- data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
- }
-
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, data_type, input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, data_type);
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), output->info(), conv_info));
@@ -1371,12 +1208,6 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
{
switch(_input->info()->data_type())
{
- case DataType::QS8:
- convolve_1x1<qint8_t, qint16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
- break;
- case DataType::QS16:
- convolve_1x1<qint16_t, qint32_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
- break;
case DataType::F32:
convolve_1x1<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
@@ -1395,9 +1226,6 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
{
switch(_input->info()->data_type())
{
- case DataType::QS8:
- convolve_3x3<qint8_t, qint16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
- break;
case DataType::F32:
convolve_3x3<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
index 37a3804289..e4cd4d0465 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
@@ -45,22 +45,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8,
- DataType::QS16, DataType::F16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8,
+ DataType::F16,
DataType::QS32, DataType::S32, DataType::F32);
if(bias != nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::QS32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::F16, DataType::QS32, DataType::S32, DataType::F32);
- if(is_data_type_fixed_point(input->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS8 && bias->data_type() != DataType::QS8, "Wrong data type for bias");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS16 && bias->data_type() != DataType::QS8, "Wrong data type for bias");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS32 && bias->data_type() != DataType::QS16, "Wrong data type for bias");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, bias);
- }
- else if(is_data_type_quantized_asymmetric(input->data_type()))
+ if(is_data_type_quantized_asymmetric(input->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
}
@@ -80,17 +73,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- if(is_data_type_fixed_point(input->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS8 && output->data_type() != DataType::QS8, "Wrong data type for output");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS16 && output->data_type() != DataType::QS8, "Wrong data type for output");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS32 && output->data_type() != DataType::QS16, "Wrong data type for output");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
- }
- else if(is_data_type_quantized_asymmetric(output->data_type()))
+ if(is_data_type_quantized_asymmetric(output->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && output->data_type() != DataType::QASYMM8, "Wrong data type for bias");
}
@@ -168,81 +154,24 @@ inline float32x4_t internal_vld1q(const float *in)
{
return vld1q_f32(in);
}
-inline qint8x16_t internal_vld1q(const qint8_t *in)
-{
- return vld1q_qs8(in);
-}
-inline qint16x8_t internal_vld1q(const qint16_t *in)
-{
- return vld1q_qs16(in);
-}
-inline qint32x4_t internal_vld1q(const qint32_t *in)
-{
- return vld1q_s32(in);
-}
// Internal store
inline void internal_vst1q(float *p, const float32x4_t &v)
{
vst1q_f32(p, v);
}
-inline void internal_vst1q(qint8_t *p, const qint8x16_t &v)
-{
- vst1q_qs8(p, v);
-}
-inline void internal_vst1q(qint8_t *p, const qint16x8_t &v)
-{
- vst1_qs8(p, vqmovn_s16(v));
-}
-inline void internal_vst1q(qint16_t *p, const qint16x8_t &v)
-{
- vst1q_qs16(p, v);
-}
-inline void internal_vst1q(qint32_t *p, const qint32x4_t &v)
-{
- vst1q_s32(p, v);
-}
-
-inline void internal_vst1q(qint16_t *p, const qint32x4_t &v)
-{
- vst1_qs16(p, vqmovn_qs32(v));
-}
// Internal vdup
inline float32x4_t internal_vdupq_n(float v)
{
return vdupq_n_f32(v);
}
-inline qint8x16_t internal_vdupq_n(qint8_t v)
-{
- return vdupq_n_qs8(v);
-}
-inline qint16x8_t internal_vdupq_n(qint16_t v)
-{
- return vdupq_n_qs16(v);
-}
-inline qint32x4_t internal_vdupq_n(qint32_t v)
-{
- return vdupq_n_qs32(v);
-}
// Internal vadd
inline float32x4_t internal_vqaddq(const float32x4_t &x, const float32x4_t &y)
{
return vaddq_f32(x, y);
}
-inline qint8x16_t internal_vqaddq(const qint8x16_t &x, const qint8x16_t &y)
-{
- return vqaddq_qs8(x, y);
-}
-inline qint16x8_t internal_vqaddq(const qint16x8_t &x, const qint16x8_t &y)
-{
- return vqaddq_qs16(x, y);
-}
-inline qint32x4_t internal_vqaddq(const qint32x4_t &x, const qint32x4_t &y)
-{
- return vqaddq_qs32(x, y);
-}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline float16x8_t internal_vld1q(const float16_t *in)
@@ -494,39 +423,6 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const
{
switch(input->info()->data_type())
{
- case DataType::QS8:
- {
- if(bias == nullptr)
- {
- _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, false> : &output_stage<qint8_t, qint8_t, false, false>;
- }
- else
- {
- _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, true> : &output_stage<qint8_t, qint8_t, false, true>;
- }
- break;
- }
- case DataType::QS16:
- {
- if(bias != nullptr && bias->info()->data_type() == DataType::QS8)
- {
- _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, true> : &output_stage<qint16_t, qint8_t, false, true>;
- }
- else if(bias == nullptr)
- {
- _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, false> : &output_stage<qint16_t, qint8_t, false, false>;
- }
- else
- {
- ARM_COMPUTE_ERROR("Not implemented");
- }
- break;
- }
- case DataType::QS32:
- {
- _func = (output == nullptr) ? &output_stage<qint32_t, qint16_t, true, true> : &output_stage<qint32_t, qint16_t, false, true>;
- break;
- }
case DataType::S32:
{
_func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>;
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index 747b8b1bfe..3d08cafa93 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -105,8 +105,8 @@ NEFillBorderKernel::NEFillBorderKernel()
void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QASYMM8,
- DataType::QS16, DataType::U16, DataType::S16,
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
@@ -147,7 +147,6 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
case DataType::U8:
fill_constant_value_single_channel<uint8_t>(window);
break;
- case DataType::QS8:
case DataType::S8:
fill_constant_value_single_channel<int8_t>(window);
break;
@@ -155,7 +154,6 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
fill_constant_value_single_channel<uint16_t>(window);
break;
case DataType::S16:
- case DataType::QS16:
fill_constant_value_single_channel<int16_t>(window);
break;
case DataType::U32:
@@ -192,7 +190,6 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
case DataType::U8:
fill_replicate_single_channel<uint8_t>(window);
break;
- case DataType::QS8:
case DataType::S8:
fill_replicate_single_channel<int8_t>(window);
break;
@@ -200,7 +197,6 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
fill_replicate_single_channel<uint16_t>(window);
break;
case DataType::S16:
- case DataType::QS16:
fill_replicate_single_channel<int16_t>(window);
break;
case DataType::U32:
diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp
index 72b652d5dc..872ac2661e 100644
--- a/src/core/NEON/kernels/NEFloorKernel.cpp
+++ b/src/core/NEON/kernels/NEFloorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ void NEFloorKernel::configure(const ITensor *input, ITensor *output)
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Auto initialize output
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
index 12755a45f8..6519a39b9c 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
@@ -44,11 +44,10 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::U8, DataType::S8,
- DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8,
+ DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
if(output->total_size() != 0)
{
@@ -57,7 +56,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
output_shape.set(1, std::ceil(input->dimension(1) / 4.0f));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index cab3c7a58f..421a6f0ef9 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -43,9 +43,8 @@ namespace
{
inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum);
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0));
@@ -161,33 +160,6 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI
break;
}
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::QS8:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const qint8x16_t accum = vld1q_qs8(reinterpret_cast<const qint8_t *>(in0_out.ptr()));
- const qint8x16_t biases = vld1q_qs8(reinterpret_cast<const qint8_t *>(in1.ptr()));
-
- vst1q_qs8(reinterpret_cast<qint8_t *>(in0_out.ptr()), vqaddq_qs8(accum, biases));
- },
- in0_out, in1);
- break;
- }
- case DataType::QS16:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- qint16x8x2_t accum = vld2q_s16(reinterpret_cast<const qint16_t *>(in0_out.ptr()));
- const qint16x8x2_t biases = vld2q_s16(reinterpret_cast<const qint16_t *>(in1.ptr()));
-
- accum.val[0] = vqaddq_qs16(accum.val[0], biases.val[0]);
- accum.val[1] = vqaddq_qs16(accum.val[1], biases.val[1]);
-
- vst2q_s16(reinterpret_cast<qint16_t *>(in0_out.ptr()), accum);
- },
- in0_out, in1);
- break;
- }
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index dfba74355b..d02504329a 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -91,54 +91,6 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi
}
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta)
-{
- const int fixed_point_position = input->info()->fixed_point_position();
- const qint8x16_t beta_qs8 = vdupq_n_qs8(sqcvt_qs8_f32(beta, fixed_point_position));
-
- Iterator in(input, window);
- Iterator out(output, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto in_ptr = reinterpret_cast<const qint8_t *>(in.ptr());
- const auto out_ptr = reinterpret_cast<qint8_t *>(out.ptr());
-
- qint8x16_t alpha_ab = vld1q_qs8(out_ptr);
- const qint8x16_t c = vld1q_qs8(in_ptr);
-
- // Multiply matrix C by its weight and accumulate
- alpha_ab = vqmlaq_qs8(alpha_ab, c, beta_qs8, fixed_point_position);
-
- vst1q_qs8(out_ptr, alpha_ab);
- },
- in, out);
-}
-
-void matrix_addition_qs16(const ITensor *input, ITensor *output, const Window &window, float beta)
-{
- const int fixed_point_position = input->info()->fixed_point_position();
- const qint16x8_t beta_qs16 = vdupq_n_qs16(sqcvt_qs16_f32(beta, fixed_point_position));
-
- Iterator in(input, window);
- Iterator out(output, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto in_ptr = reinterpret_cast<const qint16_t *>(in.ptr());
- const auto out_ptr = reinterpret_cast<qint16_t *>(out.ptr());
-
- qint16x8x2_t alpha_ab = vld2q_s16(out_ptr);
- const qint16x8x2_t c = vld2q_s16(in_ptr);
-
- // Multiply matrix C by its weight and accumulate
- alpha_ab.val[0] = vqmlaq_qs16(alpha_ab.val[0], c.val[0], beta_qs16, fixed_point_position);
- alpha_ab.val[1] = vqmlaq_qs16(alpha_ab.val[1], c.val[1], beta_qs16, fixed_point_position);
-
- vst2q_s16(out_ptr, alpha_ab);
- },
- in, out);
-}
} // namespace
NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
@@ -148,10 +100,9 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
@@ -160,12 +111,6 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
case DataType::F32:
_func = &matrix_addition_f32;
break;
- case DataType::QS8:
- _func = &matrix_addition_qs8;
- break;
- case DataType::QS16:
- _func = &matrix_addition_qs16;
- break;
case DataType::F16:
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &matrix_addition_f16;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index 69b052a9bd..196398a2de 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -356,263 +356,6 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT
}
template <bool multiply_alpha>
-void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha)
-{
- const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
- const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
- const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
- const int fixed_point_position = input0->info()->fixed_point_position();
-
- // The implementation computes 32 elements per iteration
- const int window_start_x = 32 * info.thread_id;
- const int window_step_x = 32 * info.num_threads;
- // Make sure (window_end_x - window_start_x) is a multiple of window_step_x
- const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x;
-
- Window win_out(window);
- win_out.set(Window::DimX, Window::Dimension(window_start_x, window_end_x, window_step_x));
- win_out.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- Window win_a(window);
- win_a.set(Window::DimX, Window::Dimension(0, 0, 0));
- win_a.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Window win_b;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
- if(input1->info()->num_dimensions() >= 3)
- {
- win_b = window;
- }
- win_b.set(Window::DimX, Window::Dimension(window_start_x, window_end_x, window_step_x));
- win_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- Iterator ina(input0, win_a);
- Iterator inb(input1, win_b);
- Iterator out(output, win_out);
-
- execute_window_loop(win_out, [&](const Coordinates & id)
- {
- if(id.x() > width_matrix_b)
- {
- return;
- }
-
- // Reset accumulators
- qint16x8_t acc00_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc01_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc02_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc03_qs16 = vdupq_n_qs16(0);
-
- auto vec_a = reinterpret_cast<const qint8_t *>(ina.ptr());
- auto matrix_b = reinterpret_cast<const qint8_t *>(inb.ptr());
-
- auto vec_a_end_addr = vec_a + num_elems_vec_a;
- for(; vec_a <= (vec_a_end_addr - 2);)
- {
- const qint8x8_t a0 = vld1_dup_qs8(vec_a + 0);
- const qint8x8_t a1 = vld1_dup_qs8(vec_a + 1);
-
- const qint8x8_t b00 = vld1_qs8(matrix_b + 0 + 0 * in_b_stride);
- const qint8x8_t b01 = vld1_qs8(matrix_b + 8 + 0 * in_b_stride);
- const qint8x8_t b02 = vld1_qs8(matrix_b + 16 + 0 * in_b_stride);
- const qint8x8_t b03 = vld1_qs8(matrix_b + 24 + 0 * in_b_stride);
- const qint8x8_t b10 = vld1_qs8(matrix_b + 0 + 1 * in_b_stride);
- const qint8x8_t b11 = vld1_qs8(matrix_b + 8 + 1 * in_b_stride);
- const qint8x8_t b12 = vld1_qs8(matrix_b + 16 + 1 * in_b_stride);
- const qint8x8_t b13 = vld1_qs8(matrix_b + 24 + 1 * in_b_stride);
-
- // First accumulation
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b00, a0, fixed_point_position);
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b01, a0, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b02, a0, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b03, a0, fixed_point_position);
-
- // Second accumulation
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b10, a1, fixed_point_position);
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b11, a1, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b12, a1, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b13, a1, fixed_point_position);
-
- vec_a += 2;
- matrix_b += 2 * in_b_stride;
- }
-
- for(; vec_a < vec_a_end_addr;)
- {
- const qint8x8_t a0 = vld1_dup_qs8(vec_a);
-
- const qint8x8_t b00 = vld1_qs8(matrix_b + 0);
- const qint8x8_t b01 = vld1_qs8(matrix_b + 8);
- const qint8x8_t b02 = vld1_qs8(matrix_b + 16);
- const qint8x8_t b03 = vld1_qs8(matrix_b + 24);
-
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b00, a0, fixed_point_position);
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b01, a0, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b02, a0, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b03, a0, fixed_point_position);
-
- vec_a += 1;
- matrix_b += in_b_stride;
- }
-
- // Convert back to qint8x8_t and saturate
- qint8x8_t acc00_qs8 = vqmovn_qs16(acc00_qs16);
- qint8x8_t acc01_qs8 = vqmovn_qs16(acc01_qs16);
- qint8x8_t acc02_qs8 = vqmovn_qs16(acc02_qs16);
- qint8x8_t acc03_qs8 = vqmovn_qs16(acc03_qs16);
-
- // Multiply by the weight of the matrix product (alpha)
- if(multiply_alpha)
- {
- const qint8x8_t alpha_qs8 = vdup_n_qs8(sqcvt_qs8_f32(alpha, fixed_point_position));
- acc00_qs8 = vqmul_qs8(acc00_qs8, alpha_qs8, fixed_point_position);
- acc01_qs8 = vqmul_qs8(acc01_qs8, alpha_qs8, fixed_point_position);
- acc02_qs8 = vqmul_qs8(acc02_qs8, alpha_qs8, fixed_point_position);
- acc03_qs8 = vqmul_qs8(acc03_qs8, alpha_qs8, fixed_point_position);
- }
-
- const auto mtx_out0 = reinterpret_cast<qint8_t *>(out.ptr());
-
- // Store 8x4 output elements
- vst1_qs8(mtx_out0 + 0, acc00_qs8);
- vst1_qs8(mtx_out0 + 8, acc01_qs8);
- vst1_qs8(mtx_out0 + 16, acc02_qs8);
- vst1_qs8(mtx_out0 + 24, acc03_qs8);
- },
- ina, inb, out);
-}
-
-template <bool multiply_alpha>
-void vector_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha)
-{
- const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
- const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
- const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
- const int fixed_point_position = input0->info()->fixed_point_position();
-
- // The implementation computes 16 elements per iteration
- const int window_start_x = 16 * info.thread_id;
- const int window_step_x = 16 * info.num_threads;
- // Make sure (window_end_x - window_start_x) is a multiple of window_step_x
- const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x;
- ARM_COMPUTE_ERROR_ON_MSG((window_end_x - window_start_x) % window_step_x, " (window_end_x - window_start_x) must be multiple of window_step_x");
-
- Window win_out(window);
- win_out.set(Window::DimX, Window::Dimension(window_start_x, window_end_x, window_step_x));
- win_out.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- Window win_a(window);
- win_a.set(Window::DimX, Window::Dimension(0, 0, 0));
- win_a.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Window win_b;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
- if(input1->info()->num_dimensions() >= 3)
- {
- win_b = window;
- }
- win_b.set(Window::DimX, Window::Dimension(window_start_x, window_end_x, window_step_x));
- win_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- Iterator ina(input0, win_a);
- Iterator inb(input1, win_b);
- Iterator out(output, win_out);
-
- execute_window_loop(win_out, [&](const Coordinates & id)
- {
- if(id.x() > width_matrix_b)
- {
- return;
- }
-
- // Reset accumulators
- qint32x4_t acc00_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc01_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc02_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc03_qs32 = vdupq_n_qs32(0);
-
- auto vec_a = reinterpret_cast<const qint16_t *>(ina.ptr());
- auto matrix_b = reinterpret_cast<const qint16_t *>(inb.ptr());
-
- auto vec_a_end_addr = vec_a + num_elems_vec_a;
- for(; vec_a <= (vec_a_end_addr - 2);)
- {
- const qint16x4_t a0 = vld1_dup_qs16(vec_a + 0);
- const qint16x4_t a1 = vld1_dup_qs16(vec_a + 1);
-
- const qint16x4_t b00 = vld1_qs16(matrix_b + 0 + 0 * in_b_stride);
- const qint16x4_t b01 = vld1_qs16(matrix_b + 4 + 0 * in_b_stride);
- const qint16x4_t b02 = vld1_qs16(matrix_b + 8 + 0 * in_b_stride);
- const qint16x4_t b03 = vld1_qs16(matrix_b + 12 + 0 * in_b_stride);
- const qint16x4_t b10 = vld1_qs16(matrix_b + 0 + 1 * in_b_stride);
- const qint16x4_t b11 = vld1_qs16(matrix_b + 4 + 1 * in_b_stride);
- const qint16x4_t b12 = vld1_qs16(matrix_b + 8 + 1 * in_b_stride);
- const qint16x4_t b13 = vld1_qs16(matrix_b + 12 + 1 * in_b_stride);
-
- // First accumulation
- acc00_qs32 = vqmlal_qs16(acc00_qs32, b00, a0, fixed_point_position);
- acc01_qs32 = vqmlal_qs16(acc01_qs32, b01, a0, fixed_point_position);
- acc02_qs32 = vqmlal_qs16(acc02_qs32, b02, a0, fixed_point_position);
- acc03_qs32 = vqmlal_qs16(acc03_qs32, b03, a0, fixed_point_position);
-
- // Second accumulation
- acc00_qs32 = vqmlal_qs16(acc00_qs32, b10, a1, fixed_point_position);
- acc01_qs32 = vqmlal_qs16(acc01_qs32, b11, a1, fixed_point_position);
- acc02_qs32 = vqmlal_qs16(acc02_qs32, b12, a1, fixed_point_position);
- acc03_qs32 = vqmlal_qs16(acc03_qs32, b13, a1, fixed_point_position);
-
- vec_a += 2;
- matrix_b += 2 * in_b_stride;
- }
-
- for(; vec_a < vec_a_end_addr;)
- {
- const qint16x4_t a0 = vld1_dup_qs16(vec_a);
-
- const qint16x4_t b00 = vld1_qs16(matrix_b + 0);
- const qint16x4_t b01 = vld1_qs16(matrix_b + 4);
- const qint16x4_t b02 = vld1_qs16(matrix_b + 8);
- const qint16x4_t b03 = vld1_qs16(matrix_b + 12);
-
- acc00_qs32 = vqmlal_qs16(acc00_qs32, b00, a0, fixed_point_position);
- acc01_qs32 = vqmlal_qs16(acc01_qs32, b01, a0, fixed_point_position);
- acc02_qs32 = vqmlal_qs16(acc02_qs32, b02, a0, fixed_point_position);
- acc03_qs32 = vqmlal_qs16(acc03_qs32, b03, a0, fixed_point_position);
-
- vec_a += 1;
- matrix_b += in_b_stride;
- }
-
- // Convert back to qint16x4_t and saturate
- qint16x4_t acc00_qs16 = vqmovn_qs32(acc00_qs32);
- qint16x4_t acc01_qs16 = vqmovn_qs32(acc01_qs32);
- qint16x4_t acc02_qs16 = vqmovn_qs32(acc02_qs32);
- qint16x4_t acc03_qs16 = vqmovn_qs32(acc03_qs32);
-
- // Multiply by the weight of the matrix product (alpha)
- if(multiply_alpha)
- {
- const qint16x4_t alpha_qs16 = vdup_n_qs16(sqcvt_qs16_f32(alpha, fixed_point_position));
- acc00_qs16 = vqmul_qs16(acc00_qs16, alpha_qs16, fixed_point_position);
- acc01_qs16 = vqmul_qs16(acc01_qs16, alpha_qs16, fixed_point_position);
- acc02_qs16 = vqmul_qs16(acc02_qs16, alpha_qs16, fixed_point_position);
- acc03_qs16 = vqmul_qs16(acc03_qs16, alpha_qs16, fixed_point_position);
- }
-
- const auto mtx_out0 = reinterpret_cast<qint16_t *>(out.ptr());
-
- // Store 16x4 output elements
- vst1_qs16(mtx_out0 + 0, acc00_qs16);
- vst1_qs16(mtx_out0 + 4, acc01_qs16);
- vst1_qs16(mtx_out0 + 8, acc02_qs16);
- vst1_qs16(mtx_out0 + 12, acc03_qs16);
- },
- ina, inb, out);
-}
-
-template <bool multiply_alpha>
void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
{
const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
@@ -1063,361 +806,12 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
-template <bool multiply_alpha>
-void matrix_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
-{
- const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
- const size_t out_stride1 = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type());
- const size_t out_stride2 = out_stride1 * 2;
- const size_t out_stride3 = out_stride1 * 3;
- const int num_elems_matrix_b_x = input1->info()->dimension(0);
- const int fixed_point_position = input0->info()->fixed_point_position();
- const qint8x8_t alpha_qs8 = vdup_n_qs8(sqcvt_qs8_f32(alpha, fixed_point_position));
- ARM_COMPUTE_UNUSED(alpha_qs8);
-
- // Set step_x and step_y for matrix A. Scale by a factor of 4 the Y range as the input interleaved matrix A has 4 times less the rows of the output matrix
- Window win_a(window);
- win_a.set(Window::DimX, Window::Dimension(0, 0, 0));
- win_a.set(Window::DimY, Window::Dimension(window.y().start() / 4, std::max(window.y().end() / 4, 1), 1));
-
- Window win_b;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
- if(input1->info()->num_dimensions() >= 3)
- {
- win_b = window;
- }
- // Set step_x and step_y for matrix B. Scale by a factor of 16 the X range as the input transposed matrix A has 16 times less the cols of the output matrix
- // The step along the x direction is 2 times the in_b_stride because for each iteration we compute 2 blocks of size 16x4
- win_b.set(Window::DimX, Window::Dimension(window.x().start() / 16, window.x().end() / 16, 2 * in_b_stride));
- win_b.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Iterator ina(input0, win_a);
- Iterator inb(input1, win_b);
- Iterator out(output, window);
-
- // The implementation assumes that the matrix A and Matrix B have been reshaped respectively with NEGEMMInterleave4x4 and NEGEMMTranspose1xW
- // The reshaping of the matrices helps to have a cache friendly implementation and helps to avoid the data re-arrangements needed for computing 16x4 elements per iteration
- // All the values needed for computing a single 32x4 block will be read from consecutive memory positions
- execute_window_loop(window, [&](const Coordinates & id)
- {
- auto mtx_a0 = reinterpret_cast<const qint8_t *>(ina.ptr());
- auto mtx_b0 = reinterpret_cast<const qint8_t *>(inb.ptr());
- auto mtx_b1 = mtx_b0 + in_b_stride;
-
- qint16x8_t acc00_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc10_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc20_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc30_qs16 = vdupq_n_qs16(0);
-
- qint16x8_t acc01_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc11_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc21_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc31_qs16 = vdupq_n_qs16(0);
-
- qint16x8_t acc02_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc12_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc22_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc32_qs16 = vdupq_n_qs16(0);
-
- qint16x8_t acc03_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc13_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc23_qs16 = vdupq_n_qs16(0);
- qint16x8_t acc33_qs16 = vdupq_n_qs16(0);
-
- int k = 0;
- // This for loop performs 2 accumulations
- for(; k <= (num_elems_matrix_b_x - 32); k += 32)
- {
- const qint8x8_t a0 = vld1_dup_qs8(mtx_a0 + 0);
- const qint8x8_t a1 = vld1_dup_qs8(mtx_a0 + 1);
- const qint8x8_t a2 = vld1_dup_qs8(mtx_a0 + 2);
- const qint8x8_t a3 = vld1_dup_qs8(mtx_a0 + 3);
- const qint8x8_t a4 = vld1_dup_qs8(mtx_a0 + 4);
- const qint8x8_t a5 = vld1_dup_qs8(mtx_a0 + 5);
- const qint8x8_t a6 = vld1_dup_qs8(mtx_a0 + 6);
- const qint8x8_t a7 = vld1_dup_qs8(mtx_a0 + 7);
-
- const qint8x8_t b00 = vld1_qs8(mtx_b0 + 0);
- const qint8x8_t b01 = vld1_qs8(mtx_b0 + 8);
- const qint8x8_t b10 = vld1_qs8(mtx_b1 + 0);
- const qint8x8_t b11 = vld1_qs8(mtx_b1 + 8);
-
- // First accumulation
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b00, a0, fixed_point_position);
- acc10_qs16 = vqmlal_qs8(acc10_qs16, b00, a1, fixed_point_position);
- acc20_qs16 = vqmlal_qs8(acc20_qs16, b00, a2, fixed_point_position);
- acc30_qs16 = vqmlal_qs8(acc30_qs16, b00, a3, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b10, a0, fixed_point_position);
- acc12_qs16 = vqmlal_qs8(acc12_qs16, b10, a1, fixed_point_position);
- acc22_qs16 = vqmlal_qs8(acc22_qs16, b10, a2, fixed_point_position);
- acc32_qs16 = vqmlal_qs8(acc32_qs16, b10, a3, fixed_point_position);
-
- const qint8x8_t b02 = vld1_qs8(mtx_b0 + 16);
- const qint8x8_t b03 = vld1_qs8(mtx_b0 + 24);
- const qint8x8_t b12 = vld1_qs8(mtx_b1 + 16);
- const qint8x8_t b13 = vld1_qs8(mtx_b1 + 24);
-
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b01, a0, fixed_point_position);
- acc11_qs16 = vqmlal_qs8(acc11_qs16, b01, a1, fixed_point_position);
- acc21_qs16 = vqmlal_qs8(acc21_qs16, b01, a2, fixed_point_position);
- acc31_qs16 = vqmlal_qs8(acc31_qs16, b01, a3, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b11, a0, fixed_point_position);
- acc13_qs16 = vqmlal_qs8(acc13_qs16, b11, a1, fixed_point_position);
- acc23_qs16 = vqmlal_qs8(acc23_qs16, b11, a2, fixed_point_position);
- acc33_qs16 = vqmlal_qs8(acc33_qs16, b11, a3, fixed_point_position);
-
-#if __arm__
- asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast<const uint8_t *>(mtx_a0)));
- asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast<const uint8_t *>(mtx_b0)));
- asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast<const uint8_t *>(mtx_b1)));
-#endif /* __arm__ */
-
- // Second accumulation
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b02, a4, fixed_point_position);
- acc10_qs16 = vqmlal_qs8(acc10_qs16, b02, a5, fixed_point_position);
- acc20_qs16 = vqmlal_qs8(acc20_qs16, b02, a6, fixed_point_position);
- acc30_qs16 = vqmlal_qs8(acc30_qs16, b02, a7, fixed_point_position);
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b03, a4, fixed_point_position);
- acc11_qs16 = vqmlal_qs8(acc11_qs16, b03, a5, fixed_point_position);
- acc21_qs16 = vqmlal_qs8(acc21_qs16, b03, a6, fixed_point_position);
- acc31_qs16 = vqmlal_qs8(acc31_qs16, b03, a7, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b12, a4, fixed_point_position);
- acc12_qs16 = vqmlal_qs8(acc12_qs16, b12, a5, fixed_point_position);
- acc22_qs16 = vqmlal_qs8(acc22_qs16, b12, a6, fixed_point_position);
- acc32_qs16 = vqmlal_qs8(acc32_qs16, b12, a7, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b13, a4, fixed_point_position);
- acc13_qs16 = vqmlal_qs8(acc13_qs16, b13, a5, fixed_point_position);
- acc23_qs16 = vqmlal_qs8(acc23_qs16, b13, a6, fixed_point_position);
- acc33_qs16 = vqmlal_qs8(acc33_qs16, b13, a7, fixed_point_position);
-
- mtx_a0 += 8;
- mtx_b0 += 32;
- mtx_b1 += 32;
- }
-
- // This for loop performs the left over accumulations
- for(; k < num_elems_matrix_b_x; k += 16)
- {
- const qint8x8_t a0 = vld1_dup_qs8(mtx_a0 + 0);
- const qint8x8_t a1 = vld1_dup_qs8(mtx_a0 + 1);
- const qint8x8_t a2 = vld1_dup_qs8(mtx_a0 + 2);
- const qint8x8_t a3 = vld1_dup_qs8(mtx_a0 + 3);
-
- const qint8x8_t b00 = vld1_qs8(mtx_b0 + 0);
- const qint8x8_t b01 = vld1_qs8(mtx_b0 + 8);
- const qint8x8_t b10 = vld1_qs8(mtx_b1 + 0);
- const qint8x8_t b11 = vld1_qs8(mtx_b1 + 8);
-
- acc00_qs16 = vqmlal_qs8(acc00_qs16, b00, a0, fixed_point_position);
- acc10_qs16 = vqmlal_qs8(acc10_qs16, b00, a1, fixed_point_position);
- acc20_qs16 = vqmlal_qs8(acc20_qs16, b00, a2, fixed_point_position);
- acc30_qs16 = vqmlal_qs8(acc30_qs16, b00, a3, fixed_point_position);
- acc01_qs16 = vqmlal_qs8(acc01_qs16, b01, a0, fixed_point_position);
- acc11_qs16 = vqmlal_qs8(acc11_qs16, b01, a1, fixed_point_position);
- acc21_qs16 = vqmlal_qs8(acc21_qs16, b01, a2, fixed_point_position);
- acc31_qs16 = vqmlal_qs8(acc31_qs16, b01, a3, fixed_point_position);
- acc02_qs16 = vqmlal_qs8(acc02_qs16, b10, a0, fixed_point_position);
- acc12_qs16 = vqmlal_qs8(acc12_qs16, b10, a1, fixed_point_position);
- acc22_qs16 = vqmlal_qs8(acc22_qs16, b10, a2, fixed_point_position);
- acc32_qs16 = vqmlal_qs8(acc32_qs16, b10, a3, fixed_point_position);
- acc03_qs16 = vqmlal_qs8(acc03_qs16, b11, a0, fixed_point_position);
- acc13_qs16 = vqmlal_qs8(acc13_qs16, b11, a1, fixed_point_position);
- acc23_qs16 = vqmlal_qs8(acc23_qs16, b11, a2, fixed_point_position);
- acc33_qs16 = vqmlal_qs8(acc33_qs16, b11, a3, fixed_point_position);
-
- mtx_a0 += 4;
- mtx_b0 += 16;
- mtx_b1 += 16;
- }
-
- // Convert back to qint8x8_t and saturate
- qint8x8_t acc00_qs8 = vqmovn_qs16(acc00_qs16);
- qint8x8_t acc10_qs8 = vqmovn_qs16(acc10_qs16);
- qint8x8_t acc20_qs8 = vqmovn_qs16(acc20_qs16);
- qint8x8_t acc30_qs8 = vqmovn_qs16(acc30_qs16);
-
- qint8x8_t acc01_qs8 = vqmovn_qs16(acc01_qs16);
- qint8x8_t acc11_qs8 = vqmovn_qs16(acc11_qs16);
- qint8x8_t acc21_qs8 = vqmovn_qs16(acc21_qs16);
- qint8x8_t acc31_qs8 = vqmovn_qs16(acc31_qs16);
-
- qint8x8_t acc02_qs8 = vqmovn_qs16(acc02_qs16);
- qint8x8_t acc12_qs8 = vqmovn_qs16(acc12_qs16);
- qint8x8_t acc22_qs8 = vqmovn_qs16(acc22_qs16);
- qint8x8_t acc32_qs8 = vqmovn_qs16(acc32_qs16);
-
- qint8x8_t acc03_qs8 = vqmovn_qs16(acc03_qs16);
- qint8x8_t acc13_qs8 = vqmovn_qs16(acc13_qs16);
- qint8x8_t acc23_qs8 = vqmovn_qs16(acc23_qs16);
- qint8x8_t acc33_qs8 = vqmovn_qs16(acc33_qs16);
-
- // Multiply by the weight of the matrix product (alpha)
- if(multiply_alpha)
- {
- acc00_qs8 = vqmul_qs8(acc00_qs8, alpha_qs8, fixed_point_position);
- acc10_qs8 = vqmul_qs8(acc10_qs8, alpha_qs8, fixed_point_position);
- acc20_qs8 = vqmul_qs8(acc20_qs8, alpha_qs8, fixed_point_position);
- acc30_qs8 = vqmul_qs8(acc30_qs8, alpha_qs8, fixed_point_position);
- acc01_qs8 = vqmul_qs8(acc01_qs8, alpha_qs8, fixed_point_position);
- acc11_qs8 = vqmul_qs8(acc11_qs8, alpha_qs8, fixed_point_position);
- acc21_qs8 = vqmul_qs8(acc21_qs8, alpha_qs8, fixed_point_position);
- acc31_qs8 = vqmul_qs8(acc31_qs8, alpha_qs8, fixed_point_position);
- acc02_qs8 = vqmul_qs8(acc02_qs8, alpha_qs8, fixed_point_position);
- acc12_qs8 = vqmul_qs8(acc12_qs8, alpha_qs8, fixed_point_position);
- acc22_qs8 = vqmul_qs8(acc22_qs8, alpha_qs8, fixed_point_position);
- acc32_qs8 = vqmul_qs8(acc32_qs8, alpha_qs8, fixed_point_position);
- acc03_qs8 = vqmul_qs8(acc03_qs8, alpha_qs8, fixed_point_position);
- acc13_qs8 = vqmul_qs8(acc13_qs8, alpha_qs8, fixed_point_position);
- acc23_qs8 = vqmul_qs8(acc23_qs8, alpha_qs8, fixed_point_position);
- acc33_qs8 = vqmul_qs8(acc33_qs8, alpha_qs8, fixed_point_position);
- }
-
- const auto mtx_out0 = reinterpret_cast<qint8_t *>(out.ptr());
-
- // Store 32x4 output elements
- vst1_qs8(mtx_out0 + 0, acc00_qs8);
- vst1_qs8(mtx_out0 + 8, acc01_qs8);
- vst1_qs8(mtx_out0 + 16, acc02_qs8);
- vst1_qs8(mtx_out0 + 24, acc03_qs8);
- vst1_qs8(mtx_out0 + out_stride1 + 0, acc10_qs8);
- vst1_qs8(mtx_out0 + out_stride1 + 8, acc11_qs8);
- vst1_qs8(mtx_out0 + out_stride1 + 16, acc12_qs8);
- vst1_qs8(mtx_out0 + out_stride1 + 24, acc13_qs8);
- vst1_qs8(mtx_out0 + out_stride2 + 0, acc20_qs8);
- vst1_qs8(mtx_out0 + out_stride2 + 8, acc21_qs8);
- vst1_qs8(mtx_out0 + out_stride2 + 16, acc22_qs8);
- vst1_qs8(mtx_out0 + out_stride2 + 24, acc23_qs8);
- vst1_qs8(mtx_out0 + out_stride3 + 0, acc30_qs8);
- vst1_qs8(mtx_out0 + out_stride3 + 8, acc31_qs8);
- vst1_qs8(mtx_out0 + out_stride3 + 16, acc32_qs8);
- vst1_qs8(mtx_out0 + out_stride3 + 24, acc33_qs8);
- },
- ina, inb, out);
-}
-
-template <bool multiply_alpha>
-void matrix_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
-{
- const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
- const size_t out_stride1 = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type());
- const size_t out_stride2 = out_stride1 * 2;
- const size_t out_stride3 = out_stride1 * 3;
- const int num_elems_matrix_b_x = input1->info()->dimension(0);
- const int fixed_point_position = input0->info()->fixed_point_position();
- const qint16x4_t alpha_qs16 = vdup_n_qs16(sqcvt_qs16_f32(alpha, fixed_point_position));
- ARM_COMPUTE_UNUSED(alpha_qs16);
-
- // Set step_x and step_y for matrix A. Scale by a factor of 4 the Y range as the input interleaved matrix A has 4 times less the rows of the output matrix
- Window win_a(window);
- win_a.set(Window::DimX, Window::Dimension(0, 0, 0));
- win_a.set(Window::DimY, Window::Dimension(window.y().start() / 4, std::max(window.y().end() / 4, 1), 1));
-
- Window win_b;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
- if(input1->info()->num_dimensions() >= 3)
- {
- win_b = window;
- }
- // Set step_x and step_y for matrix B. Scale by a factor of 16 the X range as the input transposed matrix A has 16 times less the cols of the output matrix
- win_b.set(Window::DimX, Window::Dimension(window.x().start() / 8, window.x().end() / 8, in_b_stride));
- win_b.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Iterator ina(input0, win_a);
- Iterator inb(input1, win_b);
- Iterator out(output, window);
-
- // The implementation assumes that the matrix A and Matrix B have been reshaped respectively with NEGEMMInterleave4x4 and NEGEMMTranspose1xW
- // The reshaping of the matrices helps to have a cache friendly implementation and helps to avoid the data re-arrangements needed for computing 8x4 elements per iteration
- // All the values needed for computing a single 8x4 block will be read from consecutive memory positions
- execute_window_loop(window, [&](const Coordinates & id)
- {
- auto mtx_a0 = reinterpret_cast<const qint16_t *>(ina.ptr());
- auto mtx_b0 = reinterpret_cast<const qint16_t *>(inb.ptr());
- auto mtx_b1 = mtx_b0 + in_b_stride;
-
- qint32x4_t acc00_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc10_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc20_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc30_qs32 = vdupq_n_qs32(0);
-
- qint32x4_t acc01_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc11_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc21_qs32 = vdupq_n_qs32(0);
- qint32x4_t acc31_qs32 = vdupq_n_qs32(0);
-
- // This for loop performs 1 accumulation
- for(int k = 0; k <= (num_elems_matrix_b_x - 8); k += 8)
- {
- const qint16x4_t a0 = vld1_dup_qs16(mtx_a0 + 0);
- const qint16x4_t a1 = vld1_dup_qs16(mtx_a0 + 1);
- const qint16x4_t a2 = vld1_dup_qs16(mtx_a0 + 2);
- const qint16x4_t a3 = vld1_dup_qs16(mtx_a0 + 3);
-
- const qint16x4_t b00 = vld1_qs16(mtx_b0 + 0);
- const qint16x4_t b01 = vld1_qs16(mtx_b0 + 4);
-
- acc00_qs32 = vqmlal_qs16(acc00_qs32, b00, a0, fixed_point_position);
- acc10_qs32 = vqmlal_qs16(acc10_qs32, b00, a1, fixed_point_position);
- acc20_qs32 = vqmlal_qs16(acc20_qs32, b00, a2, fixed_point_position);
- acc30_qs32 = vqmlal_qs16(acc30_qs32, b00, a3, fixed_point_position);
- acc01_qs32 = vqmlal_qs16(acc01_qs32, b01, a0, fixed_point_position);
- acc11_qs32 = vqmlal_qs16(acc11_qs32, b01, a1, fixed_point_position);
- acc21_qs32 = vqmlal_qs16(acc21_qs32, b01, a2, fixed_point_position);
- acc31_qs32 = vqmlal_qs16(acc31_qs32, b01, a3, fixed_point_position);
-
- mtx_a0 += 4;
- mtx_b0 += 8;
- mtx_b1 += 8;
- }
-
- // Convert back to qint16x4_t and saturate
- qint16x4_t acc00_qs16 = vqmovn_qs32(acc00_qs32);
- qint16x4_t acc10_qs16 = vqmovn_qs32(acc10_qs32);
- qint16x4_t acc20_qs16 = vqmovn_qs32(acc20_qs32);
- qint16x4_t acc30_qs16 = vqmovn_qs32(acc30_qs32);
-
- qint16x4_t acc01_qs16 = vqmovn_qs32(acc01_qs32);
- qint16x4_t acc11_qs16 = vqmovn_qs32(acc11_qs32);
- qint16x4_t acc21_qs16 = vqmovn_qs32(acc21_qs32);
- qint16x4_t acc31_qs16 = vqmovn_qs32(acc31_qs32);
-
- // Multiply by the weight of the matrix product (alpha)
- if(multiply_alpha)
- {
- acc00_qs16 = vqmul_qs16(acc00_qs16, alpha_qs16, fixed_point_position);
- acc10_qs16 = vqmul_qs16(acc10_qs16, alpha_qs16, fixed_point_position);
- acc20_qs16 = vqmul_qs16(acc20_qs16, alpha_qs16, fixed_point_position);
- acc30_qs16 = vqmul_qs16(acc30_qs16, alpha_qs16, fixed_point_position);
- acc01_qs16 = vqmul_qs16(acc01_qs16, alpha_qs16, fixed_point_position);
- acc11_qs16 = vqmul_qs16(acc11_qs16, alpha_qs16, fixed_point_position);
- acc21_qs16 = vqmul_qs16(acc21_qs16, alpha_qs16, fixed_point_position);
- acc31_qs16 = vqmul_qs16(acc31_qs16, alpha_qs16, fixed_point_position);
- }
-
- const auto mtx_out0 = reinterpret_cast<qint16_t *>(out.ptr());
-
- // Store 8x4 output elements
- vst1_qs16(mtx_out0 + 0, acc00_qs16);
- vst1_qs16(mtx_out0 + 4, acc01_qs16);
- vst1_qs16(mtx_out0 + out_stride1 + 0, acc10_qs16);
- vst1_qs16(mtx_out0 + out_stride1 + 4, acc11_qs16);
- vst1_qs16(mtx_out0 + out_stride2 + 0, acc20_qs16);
- vst1_qs16(mtx_out0 + out_stride2 + 4, acc21_qs16);
- vst1_qs16(mtx_out0 + out_stride3 + 0, acc30_qs16);
- vst1_qs16(mtx_out0 + out_stride3 + 4, acc31_qs16);
- },
- ina, inb, out);
-}
-
inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info)
{
ARM_COMPUTE_UNUSED(alpha);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32, DataType::QS8, DataType::QS16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
if(!is_interleaved)
{
@@ -1428,7 +822,6 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0));
ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
}
}
else
@@ -1467,7 +860,6 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
}
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output);
}
}
@@ -1492,16 +884,6 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
num_elems_processed_per_iteration_x = 16;
break;
}
- case DataType::QS8:
- {
- num_elems_processed_per_iteration_x = 32;
- break;
- }
- case DataType::QS16:
- {
- num_elems_processed_per_iteration_x = 16;
- break;
- }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
@@ -1539,16 +921,6 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
num_elems_processed_per_iteration_x = 8;
break;
}
- case DataType::QS8:
- {
- num_elems_processed_per_iteration_x = 32;
- break;
- }
- case DataType::QS16:
- {
- num_elems_processed_per_iteration_x = 8;
- break;
- }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
@@ -1638,18 +1010,6 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
vector_matrix_multiply_f32<false>(_input0, _input1, _output, window, info, _alpha);
break;
}
- case DataType::QS8:
- {
- multiply_alpha ? vector_matrix_multiply_qs8<true>(_input0, _input1, _output, window, info, _alpha) :
- vector_matrix_multiply_qs8<false>(_input0, _input1, _output, window, info, _alpha);
- break;
- }
- case DataType::QS16:
- {
- multiply_alpha ? vector_matrix_multiply_qs16<true>(_input0, _input1, _output, window, info, _alpha) :
- vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, info, _alpha);
- break;
- }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
@@ -1675,18 +1035,6 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
matrix_matrix_multiply_f32<false>(_input0, _input1, _output, window, _alpha);
break;
}
- case DataType::QS8:
- {
- multiply_alpha ? matrix_matrix_multiply_qs8<true>(_input0, _input1, _output, window, _alpha) :
- matrix_matrix_multiply_qs8<false>(_input0, _input1, _output, window, _alpha);
- break;
- }
- case DataType::QS16:
- {
- multiply_alpha ? matrix_matrix_multiply_qs16<true>(_input0, _input1, _output, window, _alpha) :
- matrix_matrix_multiply_qs16<false>(_input0, _input1, _output, window, _alpha);
- break;
- }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
index c1e975e77e..8588f43edf 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
@@ -177,7 +177,6 @@ void NEGEMMMatrixVectorMultiplyKernel::configure(const ITensor *input0, const IT
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output);
ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input0->info()->data_type()) && (output->info()->data_type() != DataType::S32));
ARM_COMPUTE_ERROR_ON(input0->info()->dimension(2) != input1->info()->dimension(1));
diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
index 5d6163d583..4517f46139 100644
--- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,17 +54,15 @@ TensorShape get_output_shape(const ITensorInfo *input)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::U8, DataType::S8,
- DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8,
+ DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), get_output_shape(input));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -102,7 +100,7 @@ void NEGEMMTranspose1xWKernel::configure(const ITensor *input, ITensor *output)
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), get_output_shape(input->info()), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), get_output_shape(input->info()), 1, input->info()->data_type());
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 86e3fd7a84..f03bc49ed3 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Size2D.h"
@@ -47,9 +46,8 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
@@ -90,7 +88,6 @@ inline void linearize_volume(const uint8_t *const in_ptr,
int input_stride_x,
int input_stride_y,
int input_stride_z,
- int fixed_point_position,
int pad_value,
int dilation_x,
int dilation_y)
@@ -171,18 +168,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
// Append 1 if the convolution layer has biases
if(has_bias)
{
- if(std::is_same<T, qint8_t>::value)
- {
- *out_ptr = sqcvt_qs8_f32(1.0f, fixed_point_position);
- }
- else if(std::is_same<T, qint16_t>::value)
- {
- *out_ptr = sqcvt_qs16_f32(1.0f, fixed_point_position);
- }
- else
- {
- *out_ptr = static_cast<T>(1);
- }
+ *out_ptr = static_cast<T>(1);
}
}
} // namespace
@@ -251,7 +237,6 @@ void NEIm2ColKernel::run_generic(const Window &window)
input_stride_x,
input_stride_y,
input_stride_z,
- _input->info()->fixed_point_position(),
offset,
_dilation.x(),
_dilation.y());
@@ -294,18 +279,7 @@ void NEIm2ColKernel::run_reduced(const Window &window)
// Add bias
if(_has_bias)
{
- if(std::is_same<T, qint8_t>::value)
- {
- *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = sqcvt_qs8_f32(1.0f, _input->info()->fixed_point_position());
- }
- else if(std::is_same<T, qint16_t>::value)
- {
- *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = sqcvt_qs16_f32(1.0f, _input->info()->fixed_point_position());
- }
- else
- {
- *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = static_cast<T>(1);
- }
+ *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = static_cast<T>(1);
}
}
while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
@@ -366,12 +340,6 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
_func = &NEIm2ColKernel::run_reduced<float16_t>;
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::QS8:
- _func = &NEIm2ColKernel::run_reduced<qint8_t>;
- break;
- case DataType::QS16:
- _func = &NEIm2ColKernel::run_reduced<qint16_t>;
- break;
case DataType::QASYMM8:
_func = &NEIm2ColKernel::run_reduced<qasymm8_t>;
break;
@@ -392,12 +360,6 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::QS8:
- _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
- break;
- case DataType::QS16:
- _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint16_t, false> : &NEIm2ColKernel::run_generic<qint16_t, true>;
- break;
case DataType::QASYMM8:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qasymm8_t, false> : &NEIm2ColKernel::run_generic<qasymm8_t, true>;
break;
diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
index 91776d8100..ed037832af 100644
--- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
@@ -103,7 +103,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
AccessWindowHorizontal sum_access(sum, 0, num_elems_processed_per_iteration_sum);
diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
index 434f4eb3e9..d93dc09ff9 100644
--- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
@@ -68,7 +68,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
TensorShape output_shape = compute_min_max_shape(input);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
constexpr unsigned int num_elems_processed_per_iteration = 1;
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index 776cb27d7a..253a93f196 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,26 +39,17 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
- if(is_data_type_fixed_point(input->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, input_squared);
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input);
- }
-
// Checks performed when output is configured
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
@@ -162,44 +153,6 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
}
break;
}
- case DataType::QS8:
- {
- switch(norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS8, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS8, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS8, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
- case DataType::QS16:
- {
- switch(norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS16, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS16, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerKernel::normalize_fixed_point<DataType::QS16, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
default:
ARM_COMPUTE_ERROR("NOT SUPPORTED!");
}
@@ -306,105 +259,6 @@ void NENormalizationLayerKernel::normalize_float(const Window &window)
}
}
-template <DataType dt, unsigned int dim, bool do_2D_norm>
-void NENormalizationLayerKernel::normalize_fixed_point(const Window &window)
-{
- Iterator input(_input, window);
- Iterator input_squared(_input_squared, window);
- Iterator output(_output, window);
-
- const int dim_y = 1;
- const int radius = _norm_info.norm_size() / 2;
- const int total_size = _input->info()->dimension(dim) - 1;
- const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim];
- // We account padding across X only and we iterate over rows
- const int min_left = (dim == 2) ? 0 : -static_cast<int>(border_size().left);
- const int max_right = (dim == 2) ? total_size : total_size + border_size().left;
- const int min_top = 0;
- const int max_bottom = _input->info()->dimension(dim_y) - 1;
-
- const int fixed_point_position = _input->info()->fixed_point_position();
-
- if(dt == DataType::QS8)
- {
- const qint8x16_t coeff_vec = vdupq_n_qs8_f32(_norm_info.scale_coeff(), fixed_point_position);
- const qint8x16_t beta_vec = vdupq_n_qs8_f32(_norm_info.beta(), fixed_point_position);
- const qint8x16_t kappa_vec = vdupq_n_qs8_f32(_norm_info.kappa(), fixed_point_position);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- qint8x16_t accu = vdupq_n_qs8(0);
- for(int j = first_row; j <= last_row; ++j)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr = input_squared.ptr() + row - (current_slice * input_squared_stride);
- for(int i = first_slice; i <= last_slice; ++i)
- {
- accu = vqaddq_qs8(accu, vld1q_qs8(reinterpret_cast<const qint8_t *>(input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- // Normalize
- const qint8x16_t accu_scale = vqmlaq_qs8(kappa_vec, coeff_vec, accu, fixed_point_position);
- const qint8x16_t normalized = vqpowq_qs8(accu_scale, beta_vec, fixed_point_position);
- const qint8x16_t normalized_pixel = vdivq_qs8(vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr())), normalized, fixed_point_position);
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
- else if(dt == DataType::QS16)
- {
- const qint16x8_t coeff_vec = vdupq_n_qs16_f32(_norm_info.scale_coeff(), fixed_point_position);
- const qint16x8_t beta_vec = vdupq_n_qs16_f32(_norm_info.beta(), fixed_point_position);
- const qint16x8_t kappa_vec = vdupq_n_qs16_f32(_norm_info.kappa(), fixed_point_position);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- qint16x8_t accu = vdupq_n_qs16(0);
- for(int j = first_row; j <= last_row; ++j)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr = input_squared.ptr() + row - (current_slice * input_squared_stride);
- for(int i = first_slice; i <= last_slice; ++i)
- {
- accu = vqaddq_qs16(accu, vld1q_qs16(reinterpret_cast<const qint16_t *>(input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- // Normalize
- const qint16x8_t accu_scale = vqmlaq_qs16(kappa_vec, coeff_vec, accu, fixed_point_position);
- const qint16x8_t normalized = vqpowq_qs16(accu_scale, beta_vec, fixed_point_position);
- const qint16x8_t normalized_pixel = vdivq_qs16(vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr())), normalized, fixed_point_position);
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
- else
- {
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
-
Status NENormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));
diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp
index ae1d48cc69..e9bc8effc6 100644
--- a/src/core/NEON/kernels/NEPermuteKernel.cpp
+++ b/src/core/NEON/kernels/NEPermuteKernel.cpp
@@ -45,8 +45,8 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8,
- DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((perm.num_dimensions() == 3 && !(perm[0] == 2 && perm[1] == 0 && perm[2] == 1) && !(perm[0] == 1 && perm[1] == 2 && perm[2] == 0)),
@@ -59,7 +59,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index 193ca3799c..0ec7e823a1 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -61,9 +61,9 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i
ARM_COMPUTE_UNUSED(overflow_policy);
ARM_COMPUTE_UNUSED(rounding_policy);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
@@ -71,14 +71,6 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- if(is_data_type_fixed_point(input1->data_type()) || is_data_type_fixed_point(input2->data_type()) || is_data_type_fixed_point(output->data_type()))
- {
- // Check that all data types are the same and all fixed-point positions are the same
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
- // Check if scale is representable in fixed-point with the provided settings
- ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(scale, input1);
- }
-
if(std::abs(scale - scale255_constant) < 0.00001f)
{
ARM_COMPUTE_RETURN_ERROR_ON(rounding_policy != RoundingPolicy::TO_NEAREST_UP && rounding_policy != RoundingPolicy::TO_NEAREST_EVEN);
@@ -120,11 +112,6 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
{
set_format_if_unknown(*output, Format::F16);
}
- else if(input1->data_type() == DataType::QS8 && input2->data_type() == DataType::QS8)
- {
- set_data_type_if_unknown(*output, DataType::QS8);
- set_fixed_point_position_if_zero(*output, input1->fixed_point_position());
- }
}
// Configure kernel window
@@ -220,105 +207,6 @@ void mul_U8_U8_U8_n(const void *__restrict input1_ptr, const void *__restrict in
}
template <bool is_scale255, bool is_sat>
-void mul_QS8_QS8_QS8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n, int fixed_point_position)
-{
- const auto output = static_cast<qint8_t *__restrict>(output_ptr);
-
- const qint8x16_t ta1 = vld1q_qs8(static_cast<const qint8_t *__restrict>(input1_ptr));
- const qint8x16_t ta2 = vld1q_qs8(static_cast<const qint8_t *__restrict>(input2_ptr));
-
- if(is_scale255)
- {
- qint16x8_t tmp1_high = vmovl_s8(vget_high_s8(ta1));
- qint16x8_t tmp1_low = vmovl_s8(vget_low_s8(ta1));
- const qint16x8_t tmp2_high = vmovl_s8(vget_high_s8(ta2));
- const qint16x8_t tmp2_low = vmovl_s8(vget_low_s8(ta2));
-
- const float32x4x2_t scale255_f32 =
- {
- {
- scale255_constant_f32q,
- scale255_constant_f32q
- }
- };
- const qint16x8_t scale255 = vqcvtq_qs16_f32(scale255_f32, fixed_point_position);
-
- tmp1_high = vmulq_qs16(tmp1_high, tmp2_high, fixed_point_position);
- tmp1_low = vmulq_qs16(tmp1_low, tmp2_low, fixed_point_position);
- tmp1_high = vmulq_qs16(tmp1_high, scale255, fixed_point_position);
- tmp1_low = vmulq_qs16(tmp1_low, scale255, fixed_point_position);
-
- if(is_sat)
- {
- vst1q_qs8(output, vcombine_s8(vqmovn_s16(tmp1_low), vqmovn_s16(tmp1_high)));
- }
- else
- {
- vst1q_qs8(output, vcombine_s8(vmovn_s16(tmp1_low), vmovn_s16(tmp1_high)));
- }
- }
- else
- {
- const qint8x16_t vn = vdupq_n_s8(-n);
- qint8x16_t res = ta2;
-
- if(is_sat)
- {
- res = vqshlq_s8(vqmulq_qs8(ta1, res, fixed_point_position), vn);
- }
- else
- {
- res = vshlq_s8(vmulq_qs8(ta1, res, fixed_point_position), vn);
- }
- vst1q_qs8(output, res);
- }
-}
-
-template <bool is_scale255, bool is_sat>
-void mul_QS16_QS16_QS16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n, int fixed_point_position)
-{
- const qint16x8x2_t ta1 = vld2q_qs16(static_cast<const qint16_t *__restrict>(input1_ptr));
- qint16x8x2_t res = vld2q_qs16(static_cast<const qint16_t *__restrict>(input2_ptr));
-
- if(is_scale255)
- {
- const float32x4x2_t scale255_f32 =
- {
- {
- scale255_constant_f32q,
- scale255_constant_f32q
- }
- };
- const qint16x8_t scale255 = vqcvtq_qs16_f32(scale255_f32, fixed_point_position);
- if(is_sat)
- {
- res.val[0] = vqmulq_qs16(vqmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), scale255, fixed_point_position);
- res.val[1] = vqmulq_qs16(vqmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), scale255, fixed_point_position);
- }
- else
- {
- res.val[0] = vmulq_qs16(vmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), scale255, fixed_point_position);
- res.val[1] = vmulq_qs16(vmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), scale255, fixed_point_position);
- }
- }
- else
- {
- const qint16x8_t vn = vdupq_n_s16(-n);
- if(is_sat)
- {
- res.val[0] = vqshlq_s16(vqmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), vn);
- res.val[1] = vqshlq_s16(vqmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), vn);
- }
- else
- {
- res.val[0] = vshlq_s16(vmulq_qs16(ta1.val[0], res.val[0], fixed_point_position), vn);
- res.val[1] = vshlq_s16(vmulq_qs16(ta1.val[1], res.val[1], fixed_point_position), vn);
- }
- }
- vst2q_s16(static_cast<qint16_t *__restrict>(output_ptr), res);
-}
-
-template <bool is_scale255, bool is_sat>
inline int16x8_t mul_S16_S16_S16_n_loop(const int16x8_t &input1, const int16x8_t &input2, int n)
{
int32x4_t tmp1_high = vmovl_s16(vget_high_s16(input1));
@@ -529,7 +417,7 @@ void mul_U8_S16_S16_n(const void *__restrict input1_ptr, const void *__restrict
} // namespace
NEPixelWiseMultiplicationKernel::NEPixelWiseMultiplicationKernel()
- : _func_float(nullptr), _func_int(nullptr), _func_q_int(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 }
+ : _func_float(nullptr), _func_int(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 }
{
}
@@ -550,7 +438,6 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe
_scale = scale;
_scale_exponent = 0;
_func_int = nullptr;
- _func_q_int = nullptr;
_func_float = nullptr;
bool is_scale_255 = false;
@@ -630,28 +517,6 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe
_func_int = is_sat ? &mul_U8_U8_S16_n<false, true> : &mul_U8_U8_S16_n<false, false>;
}
}
- else if(DataType::QS8 == dt_input1 && DataType::QS8 == dt_input2 && DataType::QS8 == dt_output)
- {
- if(is_scale_255)
- {
- _func_q_int = is_sat ? &mul_QS8_QS8_QS8_n<true, true> : &mul_QS8_QS8_QS8_n<true, false>;
- }
- else
- {
- _func_q_int = is_sat ? &mul_QS8_QS8_QS8_n<false, true> : &mul_QS8_QS8_QS8_n<false, false>;
- }
- }
- else if(DataType::QS16 == dt_input1 && DataType::QS16 == dt_input2 && DataType::QS16 == dt_output)
- {
- if(is_scale_255)
- {
- _func_q_int = is_sat ? &mul_QS16_QS16_QS16_n<true, true> : &mul_QS16_QS16_QS16_n<true, false>;
- }
- else
- {
- _func_q_int = is_sat ? &mul_QS16_QS16_QS16_n<false, true> : &mul_QS16_QS16_QS16_n<false, false>;
- }
- }
else if(DataType::F16 == dt_input1 && DataType::F16 == dt_input2 && DataType::F16 == dt_output)
{
_func_float = &mul_F16_F16_F16_n<false, false>;
@@ -724,17 +589,6 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo
},
input1, input2, output);
}
- else if(_func_q_int != nullptr)
- {
- int fixed_point_position = _input1->info()->fixed_point_position();
- execute_window_loop(collapsed, [&](const Coordinates & id)
- {
- (*_func_q_int)(input1.ptr(), input2.ptr(), output.ptr(), _scale_exponent, fixed_point_position);
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- },
- input1, input2, output);
- }
else
{
ARM_COMPUTE_ERROR_ON(_func_float == nullptr);
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 7877cf5cc0..e586b72d30 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/NEAsymm.h"
@@ -79,32 +78,6 @@ inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, c
return 1.f / ((end_y - start_y) * (end_x - start_x));
}
-inline qint8_t calculate_avg_scale_q8(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
- int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
-{
- static const std::array<qint8_t, 10> scale_values_q8 =
- { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
- const int start_x = id.x() * stride_x - pad_x;
- const int start_y = id.y() * stride_y - pad_y;
- const int end_x = std::min(start_x + pool_size, upper_bound_w);
- const int end_y = std::min(start_y + pool_size, upper_bound_h);
- const int val = ((end_y - start_y) * (end_x - start_x));
- return sshr_qs8(scale_values_q8[val], (7 - fixed_point_position));
-}
-
-inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
- int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
-{
- static std::array<qint16_t, 10> scale_values_q16 =
- { { 0x0, 0x0, 0x4000, 0x2AAB, 0x2000, 0x199A, 0x1555, 0x1249, 0x1000, 0xE38 } };
- const int start_x = id.x() * stride_x - pad_x;
- const int start_y = id.y() * stride_y - pad_y;
- const int end_x = std::min(start_x + pool_size, upper_bound_w);
- const int end_y = std::min(start_y + pool_size, upper_bound_h);
- const int val = ((end_y - start_y) * (end_x - start_x));
- return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
-}
-
template <bool exclude_padding>
inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step,
const int pool_size, const int upper_bound_w, const int upper_bound_h,
@@ -163,22 +136,18 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
int pool_stride_y = 0;
PoolingType pool_type = pool_info.pool_type();
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
- const bool exclude_padding = pool_info.exclude_padding();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
static const std::set<int> supported_pool_sizes = { 2, 3 };
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size_x) == supported_pool_sizes.end()) && ((input->data_type() != DataType::F32) && (input->data_type() != DataType::QASYMM8))
&& (pool_type != PoolingType::MAX));
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_fixed_point(input->data_type()) && pool_stride_x > 2);
- ARM_COMPUTE_RETURN_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->data_type()));
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
|| (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
@@ -236,22 +205,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
switch(input->data_type())
{
- case DataType::QS8:
- num_elems_read_per_iteration = 16;
- switch(pool_size_x)
- {
- case 2:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
- break;
- case 3:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
- break;
- default:
- break;
- }
- break;
case DataType::QASYMM8:
if(is_nhwc)
{
@@ -274,22 +227,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
break;
}
break;
- case DataType::QS16:
- num_elems_read_per_iteration = 8;
- switch(pool_size_x)
- {
- case 2:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 4 : 7;
- break;
- case 3:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 3 : 6;
- break;
- default:
- break;
- }
- break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
if(is_nhwc)
@@ -462,64 +399,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
const DataType data_type = input->info()->data_type();
const bool is_nchw = data_layout == DataLayout::NCHW;
- // Select appropriate function
- if(data_type == DataType::QS8)
- {
- if(_is_square)
- {
- switch(pool_size_x)
- {
- case 2:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- case 3:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- default:
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- }
- }
- else
- {
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- }
- }
- else if(data_type == DataType::QASYMM8)
+ if(data_type == DataType::QASYMM8)
{
if(pool_size_x == 2 && pool_stride_x < 3 && _is_square)
{
@@ -606,62 +486,6 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
}
}
- else if(data_type == DataType::QS16)
- {
- if(_is_square)
- {
- switch(pool_size_x)
- {
- case 2:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- case 3:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- default:
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- }
- }
- else
- {
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- }
- }
else if(data_type == DataType::F16)
{
if(_is_square)
@@ -1022,71 +846,6 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
INEKernel::configure(win_config.second);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling2_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 2;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
- const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
- qint8x8_t lower_res = {};
- qint8x8_t upper_res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
- const qint8x8_t scale_vec = vdup_n_qs8(scale);
-
- // Perform pooling
- const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);
- lower_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);
- if(pool_stride_x == 1)
- {
- const qint8x16_t sum_data_shifted = vextq_s8(sum_data, sum_data, 1);
- upper_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data_shifted), vget_high_s8(sum_data_shifted)), scale_vec, fixed_point_position);
- }
- }
- else
- {
- const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);
- lower_res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));
- if(pool_stride_x == 1)
- {
- const qint8x16_t max_data_shifted = vextq_s8(max_data, max_data, 1);
- upper_res = vpmax_s8(vget_low_s8(max_data_shifted), vget_high_s8(max_data_shifted));
- }
- }
- if(pool_stride_x == 1)
- {
- const qint8x8x2_t res = { { lower_res, upper_res } };
- vst2_s8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), lower_res);
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window)
{
@@ -1201,71 +960,6 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling2_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 2;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
- const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
- qint16x4_t lower_res = {};
- qint16x4_t upper_res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
- const qint16x4_t scale_vec = vdup_n_qs16(scale);
-
- // Perform pooling
- const qint16x8_t sum_data = vqaddq_qs16(top_data, bottom_data);
- lower_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data), vget_high_s16(sum_data)), scale_vec, fixed_point_position);
- if(pool_stride_x == 1)
- {
- const qint16x8_t sum_data_shifted = vextq_s16(sum_data, sum_data, 1);
- upper_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data_shifted), vget_high_s16(sum_data_shifted)), scale_vec, fixed_point_position);
- }
- }
- else
- {
- const qint16x8_t max_data = vmaxq_s16(top_data, bottom_data);
- lower_res = vpmax_s16(vget_low_s16(max_data), vget_high_s16(max_data));
- if(pool_stride_x == 1)
- {
- const qint16x8_t max_data_shifted = vextq_s16(max_data, max_data, 1);
- upper_res = vpmax_s16(vget_low_s16(max_data_shifted), vget_high_s16(max_data_shifted));
- }
- }
- if(pool_stride_x == 1)
- {
- const qint16x4x2_t res = { { lower_res, upper_res } };
- vst2_s16(reinterpret_cast<qint16_t *>(output.ptr()), res);
- }
- else
- {
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), lower_res);
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window)
{
@@ -1461,82 +1155,6 @@ void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const W
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling3_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 3;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
- const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));
- const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
- qint8x8_t res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
-
- // Perform pooling for stride 2
- const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);
- const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);
- const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);
- const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);
- if(pool_stride_x == 2)
- {
- const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };
- static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
- const qint8x8_t scale_vec = vdup_n_qs8(scale);
- res = vtbl2_s8(table, lookup_val);
- res = vqmul_qs8(res, scale_vec, fixed_point_position);
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- const qint8x16_t scale_vec = vdupq_n_qs8(scale);
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmulq_qs8(final_sum, scale_vec, fixed_point_position));
- }
- }
- else
- {
- const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);
- const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);
- const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);
- const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);
-
- if(pool_stride_x == 2)
- {
- const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };
- static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
- res = vtbl2_s8(table, lookup_val);
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), final_max);
- }
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window)
{
@@ -1657,77 +1275,6 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling3_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 3;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
- const auto middle_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_middle_ptr + input.offset()));
- const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
-
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
-
- // Perform pooling for stride 2
- const qint16x8_t sum_data = vqaddq_qs16(vqaddq_qs16(top_data, bottom_data), middle_data);
- const qint16x8_t sum_data2 = vextq_s16(sum_data, sum_data, 1);
- const qint16x8_t sum_data3 = vextq_s16(sum_data, sum_data, 2);
- const qint16x8_t final_sum = vqaddq_qs16(vqaddq_qs16(sum_data, sum_data2), sum_data3);
- if(pool_stride_x == 2)
- {
- const qint16x4_t tmp = { vgetq_lane_s16(final_sum, 0), vgetq_lane_s16(final_sum, 2), vgetq_lane_s16(final_sum, 4), vgetq_lane_s16(final_sum, 6) };
- const qint16x4_t scale_vec = vdup_n_qs16(scale);
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmul_qs16(tmp, scale_vec, fixed_point_position));
- }
- else
- {
- const qint16x8_t scale_vec = vdupq_n_qs16(scale);
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmulq_qs16(final_sum, scale_vec, fixed_point_position));
- }
- }
- else
- {
- const qint16x8_t max_data = vmaxq_s16(vmaxq_s16(top_data, bottom_data), middle_data);
- const qint16x8_t max_data2 = vextq_s16(max_data, max_data, 1);
- const qint16x8_t max_data3 = vextq_s16(max_data, max_data, 2);
- const qint16x8_t final_max = vmaxq_s16(vmaxq_s16(max_data, max_data2), max_data3);
-
- if(pool_stride_x == 2)
- {
- const qint16x4_t tmp = { vgetq_lane_s16(final_max, 0), vgetq_lane_s16(final_max, 2), vgetq_lane_s16(final_max, 4), vgetq_lane_s16(final_max, 6) };
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), tmp);
- }
- else
- {
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), final_max);
- }
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window)
{
@@ -1879,110 +1426,6 @@ void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const W
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::poolingMxN_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
- const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- qint8x16_t vres = {};
- qint8_t res = {};
-
- //PoolingType::MAX
- for(int y = 0; y < pool_size_y; ++y)
- {
- int x = 0;
- for(; x <= (pool_size_x - 16); x += 16)
- {
- const qint8x16_t data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
- (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- vres = vmaxq_s8(vres, data);
- }
-
- // Leftover for loop
- for(; x < pool_size_x; ++x)
- {
- qint8_t data = *(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- res = std::max(res, data);
- }
- }
- //Reduce
- const qint8x8_t half_vres = vpmax_s8(vget_low_s8(vres), vget_high_s8(vres));
- res = std::max(res, vget_lane_s8(half_vres, 0));
- res = std::max(res, vget_lane_s8(half_vres, 1));
- res = std::max(res, vget_lane_s8(half_vres, 2));
- res = std::max(res, vget_lane_s8(half_vres, 3));
- res = std::max(res, vget_lane_s8(half_vres, 4));
- res = std::max(res, vget_lane_s8(half_vres, 5));
- res = std::max(res, vget_lane_s8(half_vres, 6));
- res = std::max(res, vget_lane_s8(half_vres, 7));
-
- // Store result
- *(reinterpret_cast<qint8_t *>(output.ptr())) = res;
- },
- input, output);
-}
-
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::poolingMxN_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
- const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- qint16x8_t vres = {};
- qint16_t res = {};
-
- //PoolingType::MAX
- for(int y = 0; y < pool_size_y; ++y)
- {
- int x = 0;
- for(; x <= (pool_size_x - 8); x += 8)
- {
- const qint16x8_t data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
- (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- vres = vmaxq_s16(vres, data);
- }
-
- // Leftover for loop
- for(; x < pool_size_x; ++x)
- {
- qint16_t data = *(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- res = std::max(res, data);
- }
- }
- //Reduce
- const qint16x4_t half_vres = vpmax_s16(vget_low_s16(vres), vget_high_s16(vres));
- res = std::max(res, vget_lane_s16(half_vres, 0));
- res = std::max(res, vget_lane_s16(half_vres, 1));
- res = std::max(res, vget_lane_s16(half_vres, 2));
- res = std::max(res, vget_lane_s16(half_vres, 3));
-
- // Store result
- *(reinterpret_cast<qint16_t *>(output.ptr())) = res;
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window)
{
@@ -2688,8 +2131,6 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
unsigned int window_x_inc = 0;
switch(_input->info()->data_type())
{
- case DataType::QS8:
- case DataType::QS16:
case DataType::F16:
{
window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index ee23e76c5c..b49400ab7d 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *min_max)
{
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::U8, 0);
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::U8);
constexpr unsigned int num_elems_processed_per_iteration = 8;
diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
index a209a523d3..4d908db77b 100644
--- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,7 +51,7 @@ void NEROIPoolingLayerKernel::configure(const ITensor *input, const IROIArray *r
// Output auto inizialitation if not yet initialized
TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2), rois->num_values());
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pool_info.pooled_width()) || (output->info()->dimension(1) != pool_info.pooled_height()));
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 30d42fa25f..30f21bbf33 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -134,7 +134,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
const TensorShape output_shape = calculate_output_shape(input->tensor_shape(), axis);
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position());
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
index 45ba68d9fa..d6f470445f 100644
--- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
@@ -59,11 +59,10 @@ inline void reshape_tensor(const Window &window, const ITensor *input, ITensor *
void NEReshapeLayerKernel::configure(const ITensor *input, ITensor *output)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16,
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size());
_input = input;
@@ -94,12 +93,10 @@ void NEReshapeLayerKernel::run(const Window &window, const ThreadInfo &info)
case DataType::U8:
case DataType::S8:
case DataType::QASYMM8:
- case DataType::QS8:
reshape_tensor<uint8_t>(window, _input, _output);
break;
case DataType::U16:
case DataType::S16:
- case DataType::QS16:
case DataType::F16:
reshape_tensor<uint16_t>(window, _input, _output);
break;
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index d91efd267f..9946f002de 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -194,56 +194,7 @@ T sqadd(T a, T b);
template <typename T>
T sqsub(T a, T b);
template <typename T>
-T sqmul(T a, T b, int fixed_point_position);
-
-#define DECLARE_NEON_FUNCTIONS_FOR_FIXED_POINT(TYPET, TYPEU, TAGT, TAGU) \
- inline vec_8_byte_t<TYPET> vqsub(vec_8_byte_t<TYPET> a, vec_8_byte_t<TYPET> b) \
- { \
- return vqsub_##TAGT(a, b); \
- } \
- inline vec_8_byte_t<TYPEU> vqadd(vec_8_byte_t<TYPEU> a, vec_8_byte_t<TYPEU> b) \
- { \
- return vqadd_##TAGU(a, b); \
- } \
- inline vec_16_byte_t<TYPEU> vqadd(vec_16_byte_t<TYPEU> a, vec_16_byte_t<TYPEU> b) \
- { \
- return vqaddq_##TAGU(a, b); \
- } \
- inline vec_8_byte_t<TYPET> vqexp(vec_8_byte_t<TYPET> vec, int fixed_point_position) \
- { \
- return vqexp_q##TAGT(vec, fixed_point_position); \
- } \
- inline auto vmovl(vec_8_byte_t<TYPET> vec)->decltype(vmovl_##TAGT(vec)) \
- { \
- return vmovl_##TAGT(vec); \
- } \
- inline vec_16_byte_t<TYPET> vqrecip(vec_16_byte_t<TYPET> vec, int fixed_point_position) \
- { \
- return vqrecipq_q##TAGT(vec, fixed_point_position); \
- } \
- inline vec_16_byte_t<TYPET> vqmul(vec_16_byte_t<TYPET> a, vec_16_byte_t<TYPET> b, int fixed_point_position) \
- { \
- return vqmulq_q##TAGT(a, b, fixed_point_position); \
- } \
- template <> \
- inline TYPEU sqadd<TYPEU>(TYPEU a, TYPEU b) \
- { \
- return sqadd_q##TAGU(a, b); \
- } \
- inline TYPET sqexp(TYPET val, int fixed_point_position) \
- { \
- return sqexp_q##TAGT(val, fixed_point_position); \
- } \
- template <> \
- inline TYPET sqsub<TYPET>(TYPET a, TYPET b) \
- { \
- return sqsub_q##TAGT(a, b); \
- } \
- template <> \
- inline TYPET sqmul<TYPET>(TYPET a, TYPET b, int fixed_point_position) \
- { \
- return sqmul_q##TAGT(a, b, fixed_point_position); \
- }
+T sqmul(T a, T b);
#define DECLARE_NEON_FUNCTIONS_FOR_FLOAT(TYPE, TAG) \
inline vec_8_byte_t<TYPE> vadd(vec_8_byte_t<TYPE> a, vec_8_byte_t<TYPE> b) \
@@ -278,9 +229,6 @@ DECLARE_NEON_FUNCTIONS_FOR_TYPE(float16_t, f16)
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
DECLARE_NEON_FUNCTIONS_FOR_TYPE(float, f32)
-DECLARE_NEON_FUNCTIONS_FOR_FIXED_POINT(int8_t, int16_t, s8, s16)
-DECLARE_NEON_FUNCTIONS_FOR_FIXED_POINT(int16_t, int32_t, s16, s32)
-
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
DECLARE_NEON_FUNCTIONS_FOR_FLOAT(float16_t, f16)
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -373,16 +321,15 @@ namespace
Status validate_arguments_logits_1d_max(const ITensorInfo &input, const ITensorInfo &output)
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F32);
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
// Validate in case of configured output
if(output.total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(&input, &output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input, &output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output.tensor_shape(), TensorShape(input.tensor_shape()).set(0, 1));
}
@@ -395,7 +342,7 @@ std::pair<Status, Window> validate_and_configure_window_logits_1d_max(ITensorInf
// Softmax across the x dimension
const TensorShape output_shape = TensorShape(input.tensor_shape()).set(0, 1);
// Output auto initialization if not yet initialized
- auto_init_if_empty(output, output_shape, 1, input.data_type(), input.fixed_point_position(), input.quantization_info());
+ auto_init_if_empty(output, output_shape, 1, input.data_type(), input.quantization_info());
// Configure kernel window
const int input_width = input.valid_region().shape.x();
@@ -488,12 +435,6 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
case DataType::QASYMM8:
_func = &logits_1d_max<qasymm8_t>;
break;
- case DataType::QS8:
- _func = &logits_1d_max<qint8_t>;
- break;
- case DataType::QS16:
- _func = &logits_1d_max<qint16_t>;
- break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = &logits_1d_max<float16_t>;
@@ -543,11 +484,12 @@ namespace
Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensorInfo &max,
const ITensorInfo &output, const float beta, const ITensorInfo &tmp)
{
+ ARM_COMPUTE_UNUSED(beta);
// Check input
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F32);
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input.data_type());
@@ -555,7 +497,6 @@ Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensor
// Check max
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &max);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(TensorShape(input.tensor_shape()).set(0, 1), max.tensor_shape());
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(&input, &max);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input, &max);
// Check output if configured
@@ -564,19 +505,14 @@ Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensor
const QuantizationInfo output_quantization = is_quantized_asymmetric ? QuantizationInfo(1.f / 256.f, 0) : output.quantization_info();
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input, &output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(&input, &output);
ARM_COMPUTE_RETURN_ERROR_ON(output.quantization_info() != output_quantization);
}
- // Check beta
- ARM_COMPUTE_RETURN_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input.data_type()));
-
// Check tmp if configured
if(tmp.total_size() != 0)
{
const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : input.data_type();
ARM_COMPUTE_RETURN_ERROR_ON(tmp.data_type() != tmp_data_type);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(&input, &tmp);
// We could potentially reduce tmp memory if we could predict or make an assumption
// on the maximum number of threads that will run in parallel.
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input, &tmp);
@@ -727,88 +663,6 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
in_it, max_it, out_it);
}
-template <typename T, typename U>
-void logits_1d_softmax_fixed_point(const ITensor &in, const ITensor &max, void *const tmp,
- ITensor &out, const float /*beta*/, const Window &window)
-{
- const int start_x = in.info()->valid_region().anchor.x();
- const int input_width = in.info()->valid_region().shape.x();
-
- const int fixed_point_position = in.info()->fixed_point_position();
-
- Iterator in_it(&in, window);
- Iterator max_it(&max, window);
- Iterator out_it(&out, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- /* Get pointers */
- const auto in_ptr = reinterpret_cast<const T *>(in_it.ptr()) + start_x;
- const auto out_ptr = reinterpret_cast<T *>(out_it.ptr()) + start_x;
- const auto tmp_ptr = reinterpret_cast<T *>(tmp);
-
- vec_16_byte_t<T> vec_sum_inversed;
-
- /* Compute exponentials and sum */
- {
- /* Get max value */
- const auto max_val = *reinterpret_cast<const T *>(max_it.ptr());
- const auto vec_max = vdup_n<vec_8_byte_t<T>>(max_val);
-
- /* Init sum to zero */
- auto vec_sum = vdup_n<vec_16_byte_t<U>>(0);
-
- /* Loop over row and compute exponentials and sum */
- int i = 0;
- constexpr int vec_size = vec_size_of(vec_sum);
- for(; i <= (input_width - vec_size); i += vec_size)
- {
- auto vec_elements = vld<vec_8_byte_t<T>>(in_ptr + i);
- vec_elements = vqsub(vec_elements, vec_max);
- vec_elements = vqexp(vec_elements, fixed_point_position);
- vec_sum = vqadd(vec_sum, vmovl(vec_elements));
- vst(tmp_ptr + i, vec_elements);
- }
- /* Reduce sum */
- const vec_8_byte_t<U> sum_8_byte = vqadd(vget_high(vec_sum), vget_low(vec_sum));
- U sum = reduce_add(sqadd<U>, sum_8_byte);
-
- /* Run remaining elements */
- for(; i < input_width; ++i)
- {
- T element = sqexp(sqsub(in_ptr[i], max_val), fixed_point_position);
- sum = sqadd<U>(sum, element);
- tmp_ptr[i] = element;
- }
-
- const auto qsum = utility::saturate_cast<T>(sum);
- vec_sum_inversed = vqrecip(vdup_n<vec_16_byte_t<T>>(qsum), fixed_point_position);
- }
-
- /* Normalize exponentials */
- {
- /* Loop over row and compute softmax */
- int i = 0;
- constexpr int vec_size = vec_size_of(vec_sum_inversed);
- for(; i <= (input_width - vec_size); i += vec_size)
- {
- const auto vec_in = vld<vec_16_byte_t<T>>(tmp_ptr + i);
- const vec_16_byte_t<T> normalized_value = vqmul(vec_in, vec_sum_inversed, fixed_point_position);
- vst(out_ptr + i, normalized_value);
- }
-
- const T sum_inversed = vget_lane<0>(vec_sum_inversed);
-
- /* Run remaining elements */
- for(; i < input_width; ++i)
- {
- out_ptr[i] = sqmul(tmp_ptr[i], sum_inversed, fixed_point_position);
- }
- }
- },
- in_it, max_it, out_it);
-}
-
template <typename T>
void logits_1d_softmax_float(const ITensor &in, const ITensor &max, void *const tmp,
ITensor &out, const float beta, const Window &window)
@@ -908,12 +762,6 @@ void NELogits1DSoftmaxKernel::configure(const ITensor *input, const ITensor *max
case DataType::QASYMM8:
_func = &logits_1d_softmax_qasymm8;
break;
- case DataType::QS8:
- _func = &logits_1d_softmax_fixed_point<qint8_t, qint16_t>;
- break;
- case DataType::QS16:
- _func = &logits_1d_softmax_fixed_point<qint16_t, qint32_t>;
- break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = &logits_1d_softmax_float<float16_t>;
diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp
index 92271378ff..2630159561 100644
--- a/src/core/NEON/kernels/NETransposeKernel.cpp
+++ b/src/core/NEON/kernels/NETransposeKernel.cpp
@@ -74,7 +74,7 @@ unsigned int num_elems_processed(size_t element_size)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16,
DataType::F32);
@@ -84,7 +84,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index 3031a87637..f398409b26 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -105,14 +105,13 @@ TensorShape get_output_shape(const ITensorInfo *input, bool has_bias)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
if(biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 4) && (biases->num_dimensions() != 1));
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 5) && (biases->num_dimensions() != 2));
ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 4) && (biases->dimension(0) != input->tensor_shape()[3]));
@@ -124,7 +123,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), get_output_shape(input, biases != nullptr));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
}
return Status{};
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 676938a231..b77a47e3e1 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -33,8 +33,8 @@
using namespace arm_compute;
TensorInfo::TensorInfo()
- : _total_size(0), _fixed_point_position(0), _offset_first_element_in_bytes(0), _strides_in_bytes(), _num_channels(0), _tensor_shape(), _data_type(DataType::UNKNOWN), _format(Format::UNKNOWN),
- _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }, _quantization_info(), _data_layout(DataLayout::NCHW)
+ : _total_size(0), _offset_first_element_in_bytes(0), _strides_in_bytes(), _num_channels(0), _tensor_shape(), _data_type(DataType::UNKNOWN), _format(Format::UNKNOWN), _is_resizable{ true },
+ _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }, _quantization_info(), _data_layout(DataLayout::NCHW)
{
}
@@ -42,7 +42,6 @@ TensorInfo::TensorInfo(const ITensorInfo &info)
: TensorInfo()
{
_total_size = info.total_size();
- _fixed_point_position = info.fixed_point_position();
_offset_first_element_in_bytes = info.offset_first_element_in_bytes();
_strides_in_bytes = info.strides_in_bytes();
_num_channels = info.num_channels();
@@ -72,22 +71,22 @@ TensorInfo::TensorInfo(const TensorShape &tensor_shape, Format format)
init(tensor_shape, format);
}
-TensorInfo::TensorInfo(size_t num_channels, DataType data_type, size_t fixed_point_position)
+TensorInfo::TensorInfo(size_t num_channels, DataType data_type)
: TensorInfo()
{
- init(TensorShape(), num_channels, data_type, fixed_point_position);
+ init(TensorShape(), num_channels, data_type);
}
-TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position)
+TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type)
: TensorInfo()
{
- init(tensor_shape, num_channels, data_type, fixed_point_position);
+ init(tensor_shape, num_channels, data_type);
}
TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info)
: TensorInfo()
{
- init(tensor_shape, num_channels, data_type, 0);
+ init(tensor_shape, num_channels, data_type);
_quantization_info = quantization_info;
}
@@ -124,34 +123,28 @@ void TensorInfo::init(const TensorShape &tensor_shape, Format format,
_format = format;
}
-void TensorInfo::init(size_t num_channels, DataType data_type, size_t fixed_point_position)
+void TensorInfo::init(size_t num_channels, DataType data_type)
{
- init(TensorShape(), num_channels, data_type, fixed_point_position);
+ init(TensorShape(), num_channels, data_type);
}
-void TensorInfo::init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position)
+void TensorInfo::init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type)
{
ARM_COMPUTE_ERROR_ON(num_channels == 0);
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS8 && (fixed_point_position < 1 || fixed_point_position > 6));
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS16 && (fixed_point_position < 1 || fixed_point_position > 14));
- _fixed_point_position = fixed_point_position;
- _data_type = data_type;
- _num_channels = num_channels;
- _format = Format::UNKNOWN;
+ _data_type = data_type;
+ _num_channels = num_channels;
+ _format = Format::UNKNOWN;
set_tensor_shape(tensor_shape);
}
void TensorInfo::init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type,
const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
- size_t total_size_in_bytes, int fixed_point_position)
+ size_t total_size_in_bytes)
{
ARM_COMPUTE_ERROR_ON(num_channels == 0);
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS8 && (fixed_point_position < 1 || fixed_point_position > 6));
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS16 && (fixed_point_position < 1 || fixed_point_position > 14));
- _fixed_point_position = fixed_point_position;
_data_type = data_type;
_num_channels = num_channels;
_format = Format::UNKNOWN;
@@ -188,17 +181,14 @@ size_t TensorInfo::init_auto_padding(const TensorShape &tensor_shape, Format for
return total_size;
}
-size_t TensorInfo::init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position)
+size_t TensorInfo::init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type)
{
ARM_COMPUTE_ERROR_ON(num_channels == 0);
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS8 && (fixed_point_position < 1 || fixed_point_position > 6));
- ARM_COMPUTE_ERROR_ON(data_type == DataType::QS16 && (fixed_point_position < 1 || fixed_point_position > 14));
- _fixed_point_position = fixed_point_position;
- _data_type = data_type;
- _num_channels = num_channels;
- _format = Format::UNKNOWN;
- _tensor_shape = tensor_shape;
+ _data_type = data_type;
+ _num_channels = num_channels;
+ _format = Format::UNKNOWN;
+ _tensor_shape = tensor_shape;
_valid_region = ValidRegion{ Coordinates(), _tensor_shape };
@@ -371,14 +361,6 @@ ITensorInfo &TensorInfo::set_tensor_shape(const TensorShape &shape)
return *this;
}
-ITensorInfo &TensorInfo::set_fixed_point_position(int fixed_point_position)
-{
- ARM_COMPUTE_ERROR_ON(_data_type == DataType::QS8 && (fixed_point_position < 1 || fixed_point_position > 6));
- ARM_COMPUTE_ERROR_ON(_data_type == DataType::QS16 && (fixed_point_position < 1 || fixed_point_position > 14));
- _fixed_point_position = fixed_point_position;
- return *this;
-}
-
ITensorInfo &TensorInfo::set_quantization_info(const QuantizationInfo &quantization_info)
{
_quantization_info = quantization_info;
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index b1c59924a7..11bdbdafe0 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -24,8 +24,6 @@
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/FixedPoint.h"
-
#include "support/ToolchainSupport.h"
#include <algorithm>
@@ -145,10 +143,8 @@ const std::string &arm_compute::string_from_data_type(DataType dt)
{ DataType::UNKNOWN, "UNKNOWN" },
{ DataType::S8, "S8" },
{ DataType::U8, "U8" },
- { DataType::QS8, "QS8" },
{ DataType::S16, "S16" },
{ DataType::U16, "U16" },
- { DataType::QS16, "QS16" },
{ DataType::S32, "S32" },
{ DataType::U32, "U32" },
{ DataType::S64, "S64" },
@@ -353,14 +349,12 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const
case DataType::U8:
print_consecutive_elements_impl<uint8_t>(s, ptr, n, stream_width, element_delim);
break;
- case DataType::QS8:
case DataType::S8:
print_consecutive_elements_impl<int8_t>(s, reinterpret_cast<const int8_t *>(ptr), n, stream_width, element_delim);
break;
case DataType::U16:
print_consecutive_elements_impl<uint16_t>(s, reinterpret_cast<const uint16_t *>(ptr), n, stream_width, element_delim);
break;
- case DataType::QS16:
case DataType::S16:
print_consecutive_elements_impl<int16_t>(s, reinterpret_cast<const int16_t *>(ptr), n, stream_width, element_delim);
break;
@@ -388,12 +382,10 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp
case DataType::QASYMM8:
case DataType::U8:
return max_consecutive_elements_display_width_impl<uint8_t>(s, ptr, n);
- case DataType::QS8:
case DataType::S8:
return max_consecutive_elements_display_width_impl<int8_t>(s, reinterpret_cast<const int8_t *>(ptr), n);
case DataType::U16:
return max_consecutive_elements_display_width_impl<uint16_t>(s, reinterpret_cast<const uint16_t *>(ptr), n);
- case DataType::QS16:
case DataType::S16:
return max_consecutive_elements_display_width_impl<int16_t>(s, reinterpret_cast<const int16_t *>(ptr), n);
case DataType::U32:
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index 4c1ea5b9a2..9f3dc78022 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -64,12 +64,10 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, weights);
if(bias != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
}
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
@@ -100,7 +98,7 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights,
const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
// Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top));
@@ -116,7 +114,7 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights,
const unsigned int out_y = input->info()->dimension(1) + (input->info()->dimension(1) - 1) * (stride_y - 1) + inner_border_top + 2 * info.pad().second;
scale_out_shape.set(0, out_x);
scale_out_shape.set(1, out_y);
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type());
_scaled_output.allocator()->init(scale_out_info);
_scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right), info);
diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
index 26d46a438c..0b26f55a29 100644
--- a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
@@ -55,7 +55,7 @@ void CLDepthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector,
TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
for(unsigned int i = 0; i < _num_inputs; i++)
{
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 9248bc559b..273ef96a03 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -232,7 +232,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
{
ARM_COMPUTE_UNUSED(retain_internal_weights);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index ace3379618..f1d2924c92 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -62,7 +62,7 @@ void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const
Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
if(biases != nullptr)
@@ -77,7 +77,6 @@ Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co
if((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);
CLWeightsReshapeKernel::validate(weights, biases, output);
}
@@ -233,7 +232,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
shape_im2col.set(1, conv_w * conv_h);
// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
- TensorInfo im2col_reshaped_info(shape_im2col, 1, data_type, input->info()->fixed_point_position());
+ TensorInfo im2col_reshaped_info(shape_im2col, 1, data_type);
im2col_reshaped_info.set_quantization_info(input->info()->quantization_info());
_im2col_output.allocator()->init(im2col_reshaped_info);
_memory_group.manage(&_im2col_output);
@@ -257,7 +256,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
// GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input.
const DataType gemm_data_type = _is_quantized ? DataType::S32 : data_type;
// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
- TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
+ TensorInfo info_gemm(shape_gemm, 1, gemm_data_type);
info_gemm.set_quantization_info(output->info()->quantization_info());
_gemm_output.allocator()->init(info_gemm);
_memory_group.manage(&_gemm_output);
@@ -326,10 +325,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && input->data_layout() == DataLayout::NHWC,
"NHWC is unsupported for QASYMM8!");
@@ -369,7 +367,6 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
}
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels));
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
@@ -395,7 +392,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
// Output tensor auto inizialitation if not yet initialized
ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr));
- weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type, weights->fixed_point_position());
+ weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type);
weights_to_use = &weights_reshaped_info;
if(!skip_im2col)
@@ -408,7 +405,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
}
shape_im2col.set(0, mat_weights_rows);
shape_im2col.set(1, conv_w * conv_h);
- im2col_reshaped_info = TensorInfo(shape_im2col, 1, data_type, input->fixed_point_position());
+ im2col_reshaped_info = TensorInfo(shape_im2col, 1, data_type);
im2col_reshaped_info.set_quantization_info(input->quantization_info());
ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation));
gemm_input_to_use = &im2col_reshaped_info;
@@ -422,7 +419,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
shape_gemm.set(1, conv_w * conv_h);
const DataType gemm_data_type = is_quantized ? DataType::S32 : data_type;
// GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input.
- info_gemm = TensorInfo(shape_gemm, 1, gemm_data_type, input->fixed_point_position());
+ info_gemm = TensorInfo(shape_gemm, 1, gemm_data_type);
info_gemm.set_quantization_info(output->quantization_info());
gemm_output_to_use = &info_gemm;
}
@@ -436,7 +433,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
if(!is_nhwc)
{
- tmp_info = TensorInfo(gemm_output_to_use->tensor_shape(), 1, DataType::QASYMM8, input->fixed_point_position());
+ tmp_info = TensorInfo(gemm_output_to_use->tensor_shape(), 1, DataType::QASYMM8);
tmp_info.set_quantization_info(output->quantization_info());
gemm_output_staged_to_use = &tmp_info;
}
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 3a5133d91f..2a171c3969 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -71,7 +71,6 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf
sums_vector[i].set_data_type(input->data_type());
sums_vector[i].set_tensor_shape(shape);
sums_vector[i].set_num_channels(input->num_channels());
- sums_vector[i].set_fixed_point_position(input->fixed_point_position());
}
// Validate ReductionOperation only on first kernel
@@ -105,7 +104,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
for(unsigned int i = 0; i < _num_of_stages - 1; i++)
{
shape.set(0, ceil(shape.x() / 128.f));
- _sums_vector[i].allocator()->init(TensorInfo(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+ _sums_vector[i].allocator()->init(TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
}
// Apply ReductionOperation only on first kernel
diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
index d5427819c3..5233ff4f52 100644
--- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
@@ -48,7 +48,7 @@ Status CLWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &input
// Output auto inizialitation if not yet initialized
TensorInfo tmp_output_info = *output->clone();
TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector);
- auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type(), inputs_vector[0]->fixed_point_position());
+ auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
unsigned int width_offset = 0;
for(const auto &input : inputs_vector)
@@ -73,7 +73,7 @@ void CLWidthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector,
TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector);
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info()));
unsigned int width_offset = 0;
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index 67b2ae9d61..5cfd72f724 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -149,7 +149,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
shape_im2col.set(2, 1);
// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
- TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position());
+ TensorInfo im2col_reshaped_info(shape_im2col, 1, dt);
_input_im2col_reshaped.allocator()->init(im2col_reshaped_info);
_memory_group.manage(&_input_im2col_reshaped);
@@ -160,7 +160,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
const DataType gemm_data_type = dt;
// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
- TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
+ TensorInfo info_gemm(shape_gemm, 1, gemm_data_type);
_gemm_output.allocator()->init(info_gemm);
_memory_group.manage(&_gemm_output);
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
index 1748a5952b..0c8769b38f 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -42,11 +42,11 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float
ARM_COMPUTE_ERROR_ON(beta != 1.0f);
// Create intermediate tensors shapes
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+ _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type()));
TensorShape shape = input->info()->tensor_shape();
shape.set(0, 1);
- TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+ TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type());
_max.allocator()->init(tensor_info_max_sum);
_sum.allocator()->init(tensor_info_max_sum);
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 8051d6da0e..fda9f57499 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -63,18 +63,15 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, bias);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, bias);
if(bias != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
}
if(output->tensor_shape().total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
@@ -117,8 +114,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
// configure scale function
// Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
- const TensorInfo scale_out_info(compute_deconvolution_shape(*input->info(), stride_x, stride_y, inner_border_right, inner_border_top, info), 1, input->info()->data_type(),
- input->info()->fixed_point_position());
+ const TensorInfo scale_out_info(compute_deconvolution_shape(*input->info(), stride_x, stride_y, inner_border_right, inner_border_top, info), 1, input->info()->data_type());
_scaled_output.allocator()->init(scale_out_info);
// setup the function to convolve the upscaled output
diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
index 930f8d5a26..3d47ec2ac2 100644
--- a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
@@ -52,7 +52,7 @@ void NEDepthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, IT
TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
unsigned int depth_offset = 0;
for(unsigned int i = 0; i < _num_inputs; ++i)
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 83c3e217f3..1d65dde2a6 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -88,7 +88,7 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
}
else
{
- // Allocate the intermediate accumulator tensor in case of fixed point input
+ // Allocate the intermediate accumulator tensor in case of quantized input
if(_is_quantized)
{
_accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::S32));
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 445864c2a9..40e40c8ffa 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -34,7 +34,7 @@
using namespace arm_compute;
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_fixed_point(false),
+ : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false),
_is_activationlayer_enabled(false), _dim_split(Window::DimZ)
{
}
@@ -54,26 +54,10 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
// Check if bias should be added in the convolution result
_has_bias = (bias != nullptr);
- // Allocate the intermediate accumulator tensor in case of fixed point input
- _is_fixed_point = is_data_type_fixed_point(input->info()->data_type());
- if(_is_fixed_point)
+ _conv_kernel.configure(input, weights, output, conv_info);
+ if(_has_bias)
{
- const DataType promoted_dt = (input->info()->data_type() == DataType::QS8) ? DataType::QS16 : DataType::QS32;
- _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, promoted_dt, output->info()->fixed_point_position()));
- _memory_group.manage(&_accumulator);
- _conv_kernel.configure(input, weights, &_accumulator, conv_info);
-
- // When no bias is provided, we need to downscale the accumulator tensor
- _output_stage_kernel.configure(&_accumulator, bias, output);
- _accumulator.allocator()->allocate();
- }
- else
- {
- _conv_kernel.configure(input, weights, output, conv_info);
- if(_has_bias)
- {
- _output_stage_kernel.configure(output, bias);
- }
+ _output_stage_kernel.configure(output, bias);
}
// Add zero padding XY
@@ -92,12 +76,7 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- DataType data_type = output->data_type();
- if(is_data_type_fixed_point(data_type))
- {
- // Promote data type in case of fixed point
- data_type = ((data_type == DataType::QS8) ? DataType::QS16 : DataType::QS32);
- }
+ DataType data_type = output->data_type();
TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
// Validate Convolution kernel
@@ -129,7 +108,7 @@ void NEDirectConvolutionLayer::run()
_memory_group.acquire();
NEScheduler::get().schedule(&_conv_kernel, _dim_split);
- if(_has_bias || _is_fixed_point)
+ if(_has_bias)
{
NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY);
}
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 5b9f182bcb..3126823e9c 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -83,7 +83,7 @@ void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITenso
Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output, bool transpose_weights, bool is_batched_fc_layer)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!transpose_weights && !is_batched_fc_layer, "Configuration transpose_weights=false & is_batched_fc_layer=false not supported");
@@ -233,9 +233,8 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights, bool are_weights_reshaped)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output);
const int num_batch_dimensions = std::max(0, static_cast<int>(output->tensor_shape().num_dimensions()) - 1);
const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions;
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index a98309d304..795ffc5d1c 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -46,7 +46,7 @@ NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager)
void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16, DataType::QS8, DataType::QS16);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, d);
ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
ARM_COMPUTE_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
@@ -54,7 +54,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
if(c != nullptr)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(c, 1, DataType::F32, DataType::F16, DataType::QS8, DataType::QS16);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(c, 1, DataType::F32, DataType::F16);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c);
ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A");
ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix B");
@@ -103,8 +103,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
shape_tmp_b.set(0, b->info()->dimension(1) * transpose_w);
shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / static_cast<float>(transpose_w)));
- TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position());
- TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), a->info()->fixed_point_position());
+ TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type());
+ TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type());
_tmp_a.allocator()->init(info_a);
_tmp_b.allocator()->init(info_b);
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index d4400b8864..94ef4e7b32 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -90,12 +90,11 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I
Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose1xW)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
if(!is_data_type_quantized_asymmetric(weights->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);
}
// Check if bias are present, if yes they will be embedded to the weights matrix
const bool append_bias = (biases != nullptr);
@@ -104,7 +103,6 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co
{
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(weights->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases);
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
@@ -173,9 +171,8 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,
unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
DataLayout data_layout = input->data_layout();
@@ -201,7 +198,6 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
}
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
ARM_COMPUTE_RETURN_ERROR_ON(!weights_info.are_reshaped() && biases->dimension(0) != weights->dimension(3));
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
@@ -287,10 +283,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
ARM_COMPUTE_ERROR_THROW_ON(status);
- _is_prepared = false;
- _original_weights = weights;
- const unsigned int fixed_point_position = input->info()->fixed_point_position();
- const ITensor *biases_to_use = (_append_bias) ? biases : nullptr;
+ _is_prepared = false;
+ _original_weights = weights;
+ const ITensor *biases_to_use = (_append_bias) ? biases : nullptr;
bool run_optimised = dt == DataType::F32;
@@ -300,7 +295,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
TensorShape reshaped_weights_shape{ mat_weights_cols, mat_weights_rows };
// Create tensor to store the reshaped weights
- _weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt, fixed_point_position));
+ _weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt));
_reshape_weights.configure(weights, biases, &_weights_reshaped, false /* 1xW transpose */);
weights = &_weights_reshaped;
}
@@ -336,7 +331,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
}
// Create tensor to store the reshaped weights
- _weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt, fixed_point_position));
+ _weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt));
_reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, _is_interleaved /* 1xW transpose */);
weights = &_weights_reshaped;
}
@@ -372,7 +367,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
shape_gemm.set(1, mat_input_rows);
const DataType gemm_data_type = _is_quantized ? DataType::S32 : dt;
// GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input.
- TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
+ TensorInfo info_gemm(shape_gemm, 1, gemm_data_type);
info_gemm.set_quantization_info(output->info()->quantization_info());
_gemm_output.allocator()->init(info_gemm);
_memory_group.manage(&_gemm_output);
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
index af98ac1f17..f00114f930 100644
--- a/src/runtime/NEON/functions/NENormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, cons
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type());
_input_squared.allocator()->init(tensor_info);
// Manage intermediate buffers
diff --git a/tests/CL/CLAccessor.h b/tests/CL/CLAccessor.h
index 66f3df06af..8e800c71c1 100644
--- a/tests/CL/CLAccessor.h
+++ b/tests/CL/CLAccessor.h
@@ -77,7 +77,6 @@ public:
int num_channels() const override;
int num_elements() const override;
PaddingSize padding() const override;
- int fixed_point_position() const override;
QuantizationInfo quantization_info() const override;
const void *operator()(const Coordinates &coord) const override;
void *operator()(const Coordinates &coord) override;
@@ -142,11 +141,6 @@ inline PaddingSize CLAccessor::padding() const
return _tensor.info()->padding();
}
-inline int CLAccessor::fixed_point_position() const
-{
- return _tensor.info()->fixed_point_position();
-}
-
inline QuantizationInfo CLAccessor::quantization_info() const
{
return _tensor.info()->quantization_info();
diff --git a/tests/GLES_COMPUTE/GCAccessor.h b/tests/GLES_COMPUTE/GCAccessor.h
index df0fbaa72e..6b722b4650 100644
--- a/tests/GLES_COMPUTE/GCAccessor.h
+++ b/tests/GLES_COMPUTE/GCAccessor.h
@@ -65,7 +65,6 @@ public:
int num_channels() const override;
int num_elements() const override;
PaddingSize padding() const override;
- int fixed_point_position() const override;
QuantizationInfo quantization_info() const override;
const void *operator()(const Coordinates &coord) const override;
void *operator()(const Coordinates &coord) override;
@@ -130,11 +129,6 @@ inline PaddingSize GCAccessor::padding() const
return _tensor.info()->padding();
}
-inline int GCAccessor::fixed_point_position() const
-{
- return _tensor.info()->fixed_point_position();
-}
-
inline QuantizationInfo GCAccessor::quantization_info() const
{
return _tensor.info()->quantization_info();
diff --git a/tests/IAccessor.h b/tests/IAccessor.h
index 609eafec13..fac526c460 100644
--- a/tests/IAccessor.h
+++ b/tests/IAccessor.h
@@ -94,12 +94,6 @@ public:
*/
virtual PaddingSize padding() const = 0;
- /** Number of bits for the fractional part.
- *
- * @return the number of bits for the fractional part.
- */
- virtual int fixed_point_position() const = 0;
-
/** Quantization info in case of asymmetric quantized type
*
* @return
diff --git a/tests/NEON/Accessor.h b/tests/NEON/Accessor.h
index 60a94c20d4..ceb4c473ac 100644
--- a/tests/NEON/Accessor.h
+++ b/tests/NEON/Accessor.h
@@ -70,7 +70,6 @@ public:
int num_channels() const override;
int num_elements() const override;
PaddingSize padding() const override;
- int fixed_point_position() const override;
QuantizationInfo quantization_info() const override;
const void *operator()(const Coordinates &coord) const override;
void *operator()(const Coordinates &coord) override;
@@ -129,11 +128,6 @@ inline PaddingSize Accessor::padding() const
return _tensor.info()->padding();
}
-inline int Accessor::fixed_point_position() const
-{
- return _tensor.info()->fixed_point_position();
-}
-
inline QuantizationInfo Accessor::quantization_info() const
{
return _tensor.info()->quantization_info();
diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h
index 759a869696..335ef9130a 100644
--- a/tests/SimpleTensor.h
+++ b/tests/SimpleTensor.h
@@ -168,11 +168,6 @@ public:
* @return the available padding around the tensor.
*/
PaddingSize padding() const override;
- /** Number of bits for the fractional part.
- *
- * @return the number of bits for the fractional part.
- */
- int fixed_point_position() const override;
/** Quantization info in case of asymmetric quantized type
*
* @return
@@ -296,12 +291,6 @@ size_t SimpleTensor<T>::element_size() const
}
template <typename T>
-int SimpleTensor<T>::fixed_point_position() const
-{
- return 0;
-}
-
-template <typename T>
QuantizationInfo SimpleTensor<T>::quantization_info() const
{
return _quantization_info;
diff --git a/tests/Types.h b/tests/Types.h
index c65b56c1ba..407de8de1e 100644
--- a/tests/Types.h
+++ b/tests/Types.h
@@ -30,18 +30,6 @@
namespace arm_compute
{
-/** Fixed point operation */
-enum class FixedPointOp
-{
- ADD, /**< Addition */
- SUB, /**< Subtraction */
- MUL, /**< Multiplication */
- EXP, /**< Exponential */
- LOG, /**< Logarithm */
- INV_SQRT, /**< Inverse square root */
- RECIPROCAL /**< Reciprocal */
-};
-
/** Gradient dimension type. */
enum class GradientDimension
{
diff --git a/tests/Utils.h b/tests/Utils.h
index 7d064bdf48..111d80fdfe 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/HOGInfo.h"
#include "arm_compute/core/PyramidInfo.h"
#include "arm_compute/core/Size2D.h"
diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
index 22f1bd2942..f94ef6be19 100644
--- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
@@ -47,7 +47,7 @@ namespace benchmark
{
namespace
{
-// Special data types for networks that need 5x5 direct convolution, which does not support Fixed Point
+// Special data types for networks that need 5x5 direct convolution
#ifdef ARM_COMPUTE_ENABLE_F16
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
#else /* ARM_COMPUTE_ENABLE_F16 */
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index 4f97d7b6c1..45b23edd27 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -61,35 +61,14 @@ AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activ
case ActivationLayerInfo::ActivationFunction::SQUARE:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.1f : epsilon);
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.00001f : epsilon);
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
case ActivationLayerInfo::ActivationFunction::SQRT:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
case ActivationLayerInfo::ActivationFunction::TANH:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
default:
return AbsoluteTolerance<float>(epsilon);
}
diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp
index 42e2d223c2..5d4fa1fd5e 100644
--- a/tests/validation/CL/ArithmeticDivision.cpp
+++ b/tests/validation/CL/ArithmeticDivision.cpp
@@ -57,19 +57,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 2),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 2),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 2),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
})),
framework::dataset::make("Expected", { false, false, false, false, true })),
input1_info, input2_info, output_info, expected)
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 30dd8502ca..4ea2eb81a5 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -71,32 +71,32 @@ TEST_SUITE(CL)
TEST_SUITE(ConvolutionLayer)
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(23U, 27U, 31U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 32U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(23U, 27U, 31U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 32U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 31U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(5U, 5U, 32U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0)
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 31U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16),
+ TensorInfo(TensorShape(5U, 5U, 32U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32)
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
PadStrideInfo(1, 2, 1, 1),
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 269bf1587b..0fd7ed4ddc 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -103,33 +103,33 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::Sm
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights shape
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 4), // Non supported data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 11), // Invalid bias shape
- TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights shape
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Non supported data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape
+ TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QASYMM8, 5),
- TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32, 11),
- TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 5),
- TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, 11),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 5),
- TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
})),
framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 5b18f5953b..fad8140848 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -56,57 +56,57 @@ TEST_SUITE(DepthwiseConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32, 0), // Mismatching input feature maps
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Unsupported weights dimensions
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::QASYMM8, 0), // Unsupported activation
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Mismatching depth multiplier
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Invalid stride
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Invalid biases size
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0), // Invalid output size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 18U, 8U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(50U, 32U, 8U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Unsupported weights dimensions
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::QASYMM8), // Unsupported activation
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid stride
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 18U, 8U), 1, DataType::F32),
+ TensorInfo(TensorShape(50U, 32U, 8U), 1, DataType::QASYMM8),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::QASYMM8, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
})),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::S32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(24U), 1, DataType::S32, 0),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::S32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(24U), 1, DataType::S32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::QASYMM8, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(48U, 30U, 24U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(48U, 30U, 24U), 1, DataType::QASYMM8),
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -155,41 +155,41 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
}
DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32, 0), // Mismatching input feature maps
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching depth multiplier
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
- TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
})),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(24U), 1, DataType::S32, 0),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(24U), 1, DataType::S32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8, 0),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp
index fdd6cc812a..f748f905d1 100644
--- a/tests/validation/CL/DilatedConvolutionLayer.cpp
+++ b/tests/validation/CL/DilatedConvolutionLayer.cpp
@@ -61,23 +61,23 @@ TEST_SUITE(CL)
TEST_SUITE(DilatedConvolutionLayer)
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(23U, 27U, 23U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(23U, 27U, 23U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 23U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 23U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
PadStrideInfo(1, 2, 1, 1),
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index a796b6e4da..87f9449359 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -61,16 +61,7 @@ const auto data = combine(datasets::SmallDirectConvolutionShapes(),
combine(framework::dataset::make("PadY", 0, 2),
framework::dataset::make("KernelSize", { 3, 5 })))),
framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
-const auto data_fixed_point = combine(datasets::TinyDirectConvolutionShapes(),
- combine(framework::dataset::make("StrideX", 1, 3),
- combine(framework::dataset::make("StrideY", 1, 3),
- combine(concat(combine(framework::dataset::make("PadX", 0),
- combine(framework::dataset::make("PadY", 0),
- framework::dataset::make("KernelSize", 1))),
- combine(framework::dataset::make("PadX", 0, 2),
- combine(framework::dataset::make("PadY", 0, 2),
- framework::dataset::make("KernelSize", { 3 })))),
- framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+
/** Activation function Dataset*/
const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{
@@ -89,53 +80,53 @@ TEST_SUITE(DirectConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching input feature maps
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported kernel width
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non-rectangular weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid stride
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/CL/LSTMLayer.cpp b/tests/validation/CL/LSTMLayer.cpp
index bd43678844..e1d4cbec49 100644
--- a/tests/validation/CL/LSTMLayer.cpp
+++ b/tests/validation/CL/LSTMLayer.cpp
@@ -49,77 +49,77 @@ TEST_SUITE(LSTMLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8, 0), // Wrong data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Wrong input size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong input weights size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong recurrent weights size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong cell bias size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong cell state size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong output size
- TensorInfo(TensorShape(8U, 2U), 1, DataType::F32, 0), // Wrong scratch size
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8), // Wrong data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong input weights size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong recurrent weights size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell bias size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong cell state size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong output size
+ TensorInfo(TensorShape(8U, 2U), 1, DataType::F32), // Wrong scratch size
}),
- framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(8U, 16U), 1, DataType::F32, 0),
+ framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
})),
- framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 16U), 1, DataType::F32, 0),
+ framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
})),
- framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
+ framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
})),
- framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U), 1, DataType::F32, 0),
+ framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
})),
- framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(64U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(12U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(12U, 2U), 1, DataType::F32),
})),
framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
diff --git a/tests/validation/CL/LocallyConnected.cpp b/tests/validation/CL/LocallyConnected.cpp
index d8f236cb12..5381072131 100644
--- a/tests/validation/CL/LocallyConnected.cpp
+++ b/tests/validation/CL/LocallyConnected.cpp
@@ -52,41 +52,41 @@ TEST_SUITE(LocallyConnected)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/bias
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/output
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/weights
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/bias
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/output
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Asymmetric padding
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/bias
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/output
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/weights
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/bias
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/output
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Asymmetric padding
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 274U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0)
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 274U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32)
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 274U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0)
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F16),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 274U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 22U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F16),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 22U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32)
})),
framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0),
PadStrideInfo(2, 1, 0, 0),
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index a2dbaff272..e640e01079 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -71,19 +71,19 @@ TEST_SUITE(NormalizationLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/output
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching shapes
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Even normalization
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non implemented IN_MAP_2D
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Even normalization
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non implemented IN_MAP_2D
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
})),
framework::dataset::make("NormInfo", { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 0b8a11fe5d..133152219f 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -65,23 +65,23 @@ TEST_SUITE(PoolingLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters
- TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0), // Non-rectangular Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0), // Invalid output Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Invalid parameters
+ TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32), // Non-rectangular Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), // Invalid output Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),
}),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0),
- TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32),
})),
framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
diff --git a/tests/validation/CL/RNNLayer.cpp b/tests/validation/CL/RNNLayer.cpp
index 0af6f8ea00..9179c0955c 100644
--- a/tests/validation/CL/RNNLayer.cpp
+++ b/tests/validation/CL/RNNLayer.cpp
@@ -49,53 +49,53 @@ TEST_SUITE(RNNLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::U8, 0), // Wrong data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Wrong input size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong weights size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong recurrent weights size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong bias size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong output size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong hidden output size
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::U8), // Wrong data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong weights size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong recurrent weights size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong bias size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong output size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong hidden output size
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
})),
- framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
+ framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
})),
- framework::dataset::make("HiddenStateInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("HiddenStateInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U, 2U), 1, DataType::F32),
})),
framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp
index 36a5e6fcfb..6af3c64f73 100644
--- a/tests/validation/CL/WidthConcatenateLayer.cpp
+++ b/tests/validation/CL/WidthConcatenateLayer.cpp
@@ -44,20 +44,20 @@ TEST_SUITE(WidthConcatenateLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo1", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/output
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching y dimension
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching total width
- TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo1", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/output
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching y dimension
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching total width
+ TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
}),
- framework::dataset::make("InputInfo2", { TensorInfo(TensorShape(24U, 27U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo2", { TensorInfo(TensorShape(24U, 27U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(47U, 27U, 5U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(75U, 12U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 27U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(47U, 27U, 5U), 1, DataType::F16),
+ TensorInfo(TensorShape(75U, 12U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 27U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32)
})),
framework::dataset::make("Expected", { false, false, false, true })),
input_info1, input_info2, output_info,expected)
diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h
deleted file mode 100644
index 81c4f53724..0000000000
--- a/tests/validation/FixedPoint.h
+++ /dev/null
@@ -1,997 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__
-#define __ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__
-
-#include "support/ToolchainSupport.h"
-#include "tests/Utils.h"
-
-#include <cassert>
-#include <cstdint>
-#include <cstdlib>
-#include <limits>
-#include <string>
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace fixed_point_arithmetic
-{
-namespace detail
-{
-// Forward declare structs
-struct functions;
-template <typename T>
-struct constant_expr;
-}
-
-/** Fixed point traits */
-namespace traits
-{
-// Promote types
-// *INDENT-OFF*
-// clang-format off
-/** Promote a type */
-template <typename T> struct promote { };
-/** Promote uint8_t to uint16_t */
-template <> struct promote<uint8_t> { using type = uint16_t; /**< Promoted type */ };
-/** Promote int8_t to int16_t */
-template <> struct promote<int8_t> { using type = int16_t; /**< Promoted type */ };
-/** Promote uint16_t to uint32_t */
-template <> struct promote<uint16_t> { using type = uint32_t; /**< Promoted type */ };
-/** Promote int16_t to int32_t */
-template <> struct promote<int16_t> { using type = int32_t; /**< Promoted type */ };
-/** Promote uint32_t to uint64_t */
-template <> struct promote<uint32_t> { using type = uint64_t; /**< Promoted type */ };
-/** Promote int32_t to int64_t */
-template <> struct promote<int32_t> { using type = int64_t; /**< Promoted type */ };
-/** Promote float to float */
-template <> struct promote<float> { using type = float; /**< Promoted type */ };
-/** Promote half to half */
-template <> struct promote<half> { using type = half; /**< Promoted type */ };
-
-/** Get promoted type */
-template <typename T>
-using promote_t = typename promote<T>::type;
-// clang-format on
-// *INDENT-ON*
-}
-
-/** Strongly typed enum class representing the overflow policy */
-enum class OverflowPolicy
-{
- WRAP, /**< Wrap policy */
- SATURATE /**< Saturate policy */
-};
-/** Strongly typed enum class representing the rounding policy */
-enum class RoundingPolicy
-{
- TO_ZERO, /**< Round to zero policy */
- TO_NEAREST_EVEN /**< Round to nearest even policy */
-};
-
-/** Arbitrary fixed-point arithmetic class */
-template <typename T>
-class fixed_point
-{
-public:
- // Static Checks
- static_assert(std::is_integral<T>::value, "Type is not an integer");
-
- /** Constructor (from different fixed point type)
- *
- * @param[in] val Fixed point
- * @param[in] p Fixed point precision
- */
- template <typename U>
- fixed_point(fixed_point<U> val, uint8_t p)
- : _value(0), _fixed_point_position(p)
- {
- assert(p > 0 && p < std::numeric_limits<T>::digits);
- T v = 0;
-
- if(std::numeric_limits<T>::digits < std::numeric_limits<U>::digits)
- {
- val.rescale(p);
- v = detail::constant_expr<T>::saturate_cast(val.raw());
- }
- else
- {
- auto v_cast = static_cast<fixed_point<T>>(val);
- v_cast.rescale(p);
- v = v_cast.raw();
- }
- _value = static_cast<T>(v);
- }
- /** Constructor (from integer)
- *
- * @param[in] val Integer value to be represented as fixed point
- * @param[in] p Fixed point precision
- * @param[in] is_raw If true val is a raw fixed point value else an integer
- */
- template <typename U, typename = typename std::enable_if<std::is_integral<U>::value>::type>
- fixed_point(U val, uint8_t p, bool is_raw = false)
- : _value(val << p), _fixed_point_position(p)
- {
- if(is_raw)
- {
- _value = val;
- }
- }
- /** Constructor (from float)
- *
- * @param[in] val Float value to be represented as fixed point
- * @param[in] p Fixed point precision
- */
- fixed_point(float val, uint8_t p)
- : _value(detail::constant_expr<T>::to_fixed(val, p)), _fixed_point_position(p)
- {
- assert(p > 0 && p < std::numeric_limits<T>::digits);
- }
- /** Constructor (from float string)
- *
- * @param[in] str Float string to be represented as fixed point
- * @param[in] p Fixed point precision
- */
- fixed_point(std::string str, uint8_t p)
- : _value(detail::constant_expr<T>::to_fixed(support::cpp11::stof(str), p)), _fixed_point_position(p)
- {
- assert(p > 0 && p < std::numeric_limits<T>::digits);
- }
- /** Default copy constructor */
- fixed_point &operator=(const fixed_point &) = default;
- /** Default move constructor */
- fixed_point &operator=(fixed_point &&) = default;
- /** Default copy assignment operator */
- fixed_point(const fixed_point &) = default;
- /** Default move assignment operator */
- fixed_point(fixed_point &&) = default;
-
- /** Float conversion operator
- *
- * @return Float representation of fixed point
- */
- operator float() const
- {
- return detail::constant_expr<T>::to_float(_value, _fixed_point_position);
- }
- /** Integer conversion operator
- *
- * @return Integer representation of fixed point
- */
- template <typename U, typename = typename std::enable_if<std::is_integral<T>::value>::type>
- operator U() const
- {
- return detail::constant_expr<T>::to_int(_value, _fixed_point_position);
- }
- /** Convert to different fixed point of different type but same precision
- *
- * @note Down-conversion might fail.
- */
- template <typename U>
- operator fixed_point<U>()
- {
- U val = static_cast<U>(_value);
- if(std::numeric_limits<U>::digits < std::numeric_limits<T>::digits)
- {
- val = detail::constant_expr<U>::saturate_cast(_value);
- }
- return fixed_point<U>(val, _fixed_point_position, true);
- }
-
- /** Arithmetic += assignment operator
- *
- * @param[in] rhs Fixed point operand
- *
- * @return Reference to this fixed point
- */
- template <typename U>
- fixed_point<T> &operator+=(const fixed_point<U> &rhs)
- {
- fixed_point<T> val(rhs, _fixed_point_position);
- _value += val.raw();
- return *this;
- }
- /** Arithmetic -= assignment operator
- *
- * @param[in] rhs Fixed point operand
- *
- * @return Reference to this fixed point
- */
- template <typename U>
- fixed_point<T> &operator-=(const fixed_point<U> &rhs)
- {
- fixed_point<T> val(rhs, _fixed_point_position);
- _value -= val.raw();
- return *this;
- }
-
- /** Raw value accessor
- *
- * @return Raw fixed point value
- */
- T raw() const
- {
- return _value;
- }
- /** Precision accessor
- *
- * @return Precision of fixed point
- */
- uint8_t precision() const
- {
- return _fixed_point_position;
- }
- /** Rescale a fixed point to a new precision
- *
- * @param[in] p New fixed point precision
- */
- void rescale(uint8_t p)
- {
- assert(p > 0 && p < std::numeric_limits<T>::digits);
-
- using promoted_T = typename traits::promote<T>::type;
- promoted_T val = _value;
- if(p > _fixed_point_position)
- {
- val <<= (p - _fixed_point_position);
- }
- else if(p < _fixed_point_position)
- {
- uint8_t pbar = _fixed_point_position - p;
- val += (pbar != 0) ? (1 << (pbar - 1)) : 0;
- val >>= pbar;
- }
-
- _value = detail::constant_expr<T>::saturate_cast(val);
- _fixed_point_position = p;
- }
-
-private:
- T _value; /**< Fixed point raw value */
- uint8_t _fixed_point_position; /**< Fixed point precision */
-};
-
-namespace detail
-{
-/** Count the number of leading zero bits in the given value.
- *
- * @param[in] value Input value.
- *
- * @return Number of leading zero bits.
- */
-template <typename T>
-constexpr int clz(T value)
-{
- using unsigned_T = typename std::make_unsigned<T>::type;
- // __builtin_clz is available for int. Need to correct reported number to
- // match the original type.
- return __builtin_clz(value) - (32 - std::numeric_limits<unsigned_T>::digits);
-}
-
-/** Constant expressions */
-template <typename T>
-struct constant_expr
-{
- /** Calculate representation of 1 in fixed point given a fixed point precision
- *
- * @param[in] p Fixed point precision
- *
- * @return Representation of value 1 in fixed point.
- */
- static constexpr T fixed_one(uint8_t p)
- {
- return (1 << p);
- }
- /** Calculate fixed point precision step given a fixed point precision
- *
- * @param[in] p Fixed point precision
- *
- * @return Fixed point precision step
- */
- static constexpr float fixed_step(uint8_t p)
- {
- return (1.0f / static_cast<float>(1 << p));
- }
-
- /** Convert a fixed point value to float given its precision.
- *
- * @param[in] val Fixed point value
- * @param[in] p Fixed point precision
- *
- * @return Float representation of the fixed point number
- */
- static constexpr float to_float(T val, uint8_t p)
- {
- return static_cast<float>(val * fixed_step(p));
- }
- /** Convert a fixed point value to integer given its precision.
- *
- * @param[in] val Fixed point value
- * @param[in] p Fixed point precision
- *
- * @return Integer of the fixed point number
- */
- static constexpr T to_int(T val, uint8_t p)
- {
- return val >> p;
- }
- /** Convert a single precision floating point value to a fixed point representation given its precision.
- *
- * @param[in] val Floating point value
- * @param[in] p Fixed point precision
- *
- * @return The raw fixed point representation
- */
- static constexpr T to_fixed(float val, uint8_t p)
- {
- return static_cast<T>(saturate_cast<float>(val * fixed_one(p) + ((val >= 0) ? 0.5 : -0.5)));
- }
- /** Clamp value between two ranges
- *
- * @param[in] val Value to clamp
- * @param[in] min Minimum value to clamp to
- * @param[in] max Maximum value to clamp to
- *
- * @return clamped value
- */
- static constexpr T clamp(T val, T min, T max)
- {
- return std::min(std::max(val, min), max);
- }
- /** Saturate given number
- *
- * @param[in] val Value to saturate
- *
- * @return Saturated value
- */
- template <typename U>
- static constexpr T saturate_cast(U val)
- {
- return static_cast<T>(std::min<U>(std::max<U>(val, static_cast<U>(std::numeric_limits<T>::min())), static_cast<U>(std::numeric_limits<T>::max())));
- }
-};
-/** Functions */
-struct functions
-{
- /** Output stream operator
- *
- * @param[in] s Output stream
- * @param[in] x Fixed point value
- *
- * @return Reference output to updated stream
- */
- template <typename T, typename U, typename traits>
- static std::basic_ostream<T, traits> &write(std::basic_ostream<T, traits> &s, fixed_point<U> &x)
- {
- return s << static_cast<float>(x);
- }
- /** Signbit of a fixed point number.
- *
- * @param[in] x Fixed point number
- *
- * @return True if negative else false.
- */
- template <typename T>
- static bool signbit(fixed_point<T> x)
- {
- return ((x.raw() >> std::numeric_limits<T>::digits) != 0);
- }
- /** Checks if two fixed point numbers are equal
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed points are equal else false
- */
- template <typename T>
- static bool isequal(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- return (x.raw() == y.raw());
- }
- /** Checks if two fixed point number are not equal
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed points are not equal else false
- */
- template <typename T>
- static bool isnotequal(fixed_point<T> x, fixed_point<T> y)
- {
- return !isequal(x, y);
- }
- /** Checks if one fixed point is greater than the other
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed point is greater than other
- */
- template <typename T>
- static bool isgreater(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- return (x.raw() > y.raw());
- }
- /** Checks if one fixed point is greater or equal than the other
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed point is greater or equal than other
- */
- template <typename T>
- static bool isgreaterequal(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- return (x.raw() >= y.raw());
- }
- /** Checks if one fixed point is less than the other
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed point is less than other
- */
- template <typename T>
- static bool isless(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- return (x.raw() < y.raw());
- }
- /** Checks if one fixed point is less or equal than the other
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed point is less or equal than other
- */
- template <typename T>
- static bool islessequal(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- return (x.raw() <= y.raw());
- }
- /** Checks if one fixed point is less or greater than the other
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return True if fixed point is less or greater than other
- */
- template <typename T>
- static bool islessgreater(fixed_point<T> x, fixed_point<T> y)
- {
- return isnotequal(x, y);
- }
- /** Clamp fixed point to specific range.
- *
- * @param[in] x Fixed point operand
- * @param[in] min Minimum value to clamp to
- * @param[in] max Maximum value to clamp to
- *
- * @return Clamped result
- */
- template <typename T>
- static fixed_point<T> clamp(fixed_point<T> x, T min, T max)
- {
- return fixed_point<T>(constant_expr<T>::clamp(x.raw(), min, max), x.precision(), true);
- }
- /** Negate number
- *
- * @param[in] x Fixed point operand
- *
- * @return Negated fixed point result
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> negate(fixed_point<T> x)
- {
- using promoted_T = typename traits::promote<T>::type;
- promoted_T val = -x.raw();
- if(OP == OverflowPolicy::SATURATE)
- {
- val = constant_expr<T>::saturate_cast(val);
- }
- return fixed_point<T>(static_cast<T>(val), x.precision(), true);
- }
- /** Perform addition among two fixed point numbers
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return Result fixed point with precision equal to minimum precision of both operands
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> add(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- if(OP == OverflowPolicy::SATURATE)
- {
- using type = typename traits::promote<T>::type;
- type val = static_cast<type>(x.raw()) + static_cast<type>(y.raw());
- val = constant_expr<T>::saturate_cast(val);
- return fixed_point<T>(static_cast<T>(val), p, true);
- }
- else
- {
- return fixed_point<T>(x.raw() + y.raw(), p, true);
- }
- }
- /** Perform subtraction among two fixed point numbers
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return Result fixed point with precision equal to minimum precision of both operands
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> sub(fixed_point<T> x, fixed_point<T> y)
- {
- uint8_t p = std::min(x.precision(), y.precision());
- x.rescale(p);
- y.rescale(p);
- if(OP == OverflowPolicy::SATURATE)
- {
- using type = typename traits::promote<T>::type;
- type val = static_cast<type>(x.raw()) - static_cast<type>(y.raw());
- val = constant_expr<T>::saturate_cast(val);
- return fixed_point<T>(static_cast<T>(val), p, true);
- }
- else
- {
- return fixed_point<T>(x.raw() - y.raw(), p, true);
- }
- }
- /** Perform multiplication among two fixed point numbers
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return Result fixed point with precision equal to minimum precision of both operands
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> mul(fixed_point<T> x, fixed_point<T> y)
- {
- using promoted_T = typename traits::promote<T>::type;
- uint8_t p_min = std::min(x.precision(), y.precision());
- uint8_t p_max = std::max(x.precision(), y.precision());
- promoted_T round_factor = (1 << (p_max - 1));
- promoted_T val = ((static_cast<promoted_T>(x.raw()) * static_cast<promoted_T>(y.raw())) + round_factor) >> p_max;
- if(OP == OverflowPolicy::SATURATE)
- {
- val = constant_expr<T>::saturate_cast(val);
- }
- return fixed_point<T>(static_cast<T>(val), p_min, true);
- }
- /** Perform division among two fixed point numbers
- *
- * @param[in] x First fixed point operand
- * @param[in] y Second fixed point operand
- *
- * @return Result fixed point with precision equal to minimum precision of both operands
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> div(fixed_point<T> x, fixed_point<T> y)
- {
- using promoted_T = typename traits::promote<T>::type;
- uint8_t p = std::min(x.precision(), y.precision());
- promoted_T denom = static_cast<promoted_T>(y.raw());
- if(denom != 0)
- {
- promoted_T val = (static_cast<promoted_T>(x.raw()) << std::max(x.precision(), y.precision())) / denom;
- if(OP == OverflowPolicy::SATURATE)
- {
- val = constant_expr<T>::saturate_cast(val);
- }
- return fixed_point<T>(static_cast<T>(val), p, true);
- }
- else
- {
- T val = (x.raw() < 0) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
- return fixed_point<T>(val, p, true);
- }
- }
- /** Shift left
- *
- * @param[in] x Fixed point operand
- * @param[in] shift Shift value
- *
- * @return Shifted value
- */
- template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
- static fixed_point<T> shift_left(fixed_point<T> x, size_t shift)
- {
- using promoted_T = typename traits::promote<T>::type;
- promoted_T val = static_cast<promoted_T>(x.raw()) << shift;
- if(OP == OverflowPolicy::SATURATE)
- {
- val = constant_expr<T>::saturate_cast(val);
- }
- return fixed_point<T>(static_cast<T>(val), x.precision(), true);
- }
- /** Shift right
- *
- * @param[in] x Fixed point operand
- * @param[in] shift Shift value
- *
- * @return Shifted value
- */
- template <typename T>
- static fixed_point<T> shift_right(fixed_point<T> x, size_t shift)
- {
- return fixed_point<T>(x.raw() >> shift, x.precision(), true);
- }
- /** Calculate absolute value
- *
- * @param[in] x Fixed point operand
- *
- * @return Absolute value of operand
- */
- template <typename T>
- static fixed_point<T> abs(fixed_point<T> x)
- {
- using promoted_T = typename traits::promote<T>::type;
- T val = (x.raw() < 0) ? constant_expr<T>::saturate_cast(-static_cast<promoted_T>(x.raw())) : x.raw();
- return fixed_point<T>(val, x.precision(), true);
- }
- /** Calculate the logarithm of a fixed point number
- *
- * @param[in] x Fixed point operand
- *
- * @return Logarithm value of operand
- */
- template <typename T>
- static fixed_point<T> log(fixed_point<T> x)
- {
- uint8_t p = x.precision();
- auto const_one = fixed_point<T>(static_cast<T>(1), p);
-
- // Logarithm of 1 is zero and logarithm of negative values is not defined in R, so return 0.
- // Also, log(x) == -log(1/x) for 0 < x < 1.
- if(isequal(x, const_one) || islessequal(x, fixed_point<T>(static_cast<T>(0), p)))
- {
- return fixed_point<T>(static_cast<T>(0), p, true);
- }
- else if(isless(x, const_one))
- {
- return mul(log(div(const_one, x)), fixed_point<T>(-1, p));
- }
-
- // Remove even powers of 2
- T shift_val = 31 - __builtin_clz(x.raw() >> p);
- x = shift_right(x, shift_val);
- x = sub(x, const_one);
-
- // Constants
- auto ln2 = fixed_point<T>(0.6931471, p);
- auto A = fixed_point<T>(1.4384189, p);
- auto B = fixed_point<T>(-0.67719, p);
- auto C = fixed_point<T>(0.3218538, p);
- auto D = fixed_point<T>(-0.0832229, p);
-
- // Polynomial expansion
- auto sum = add(mul(x, D), C);
- sum = add(mul(x, sum), B);
- sum = add(mul(x, sum), A);
- sum = mul(x, sum);
-
- return mul(add(sum, fixed_point<T>(static_cast<T>(shift_val), p)), ln2);
- }
- /** Calculate the exponential of a fixed point number.
- *
- * exp(x) = exp(floor(x)) * exp(x - floor(x))
- * = pow(2, floor(x) / ln(2)) * exp(x - floor(x))
- * = exp(x - floor(x)) << (floor(x) / ln(2))
- *
- * @param[in] x Fixed point operand
- *
- * @return Exponential value of operand
- */
- template <typename T>
- static fixed_point<T> exp(fixed_point<T> x)
- {
- uint8_t p = x.precision();
- // Constants
- auto const_one = fixed_point<T>(1, p);
- auto ln2 = fixed_point<T>(0.6931471, p);
- auto inv_ln2 = fixed_point<T>(1.442695, p);
- auto A = fixed_point<T>(0.9978546, p);
- auto B = fixed_point<T>(0.4994721, p);
- auto C = fixed_point<T>(0.1763723, p);
- auto D = fixed_point<T>(0.0435108, p);
-
- T scaled_int_part = detail::constant_expr<T>::to_int(mul(x, inv_ln2).raw(), p);
-
- // Polynomial expansion
- auto frac_part = sub(x, mul(ln2, fixed_point<T>(scaled_int_part, p)));
- auto taylor = add(mul(frac_part, D), C);
- taylor = add(mul(frac_part, taylor), B);
- taylor = add(mul(frac_part, taylor), A);
- taylor = mul(frac_part, taylor);
- taylor = add(taylor, const_one);
-
- // Saturate value
- if(static_cast<T>(clz(taylor.raw())) <= scaled_int_part)
- {
- return fixed_point<T>(std::numeric_limits<T>::max(), p, true);
- }
-
- return (scaled_int_part < 0) ? shift_right(taylor, -scaled_int_part) : shift_left(taylor, scaled_int_part);
- }
- /** Calculate the inverse square root of a fixed point number
- *
- * @param[in] x Fixed point operand
- *
- * @return Inverse square root value of operand
- */
- template <typename T>
- static fixed_point<T> inv_sqrt(fixed_point<T> x)
- {
- const uint8_t p = x.precision();
- int8_t shift = std::numeric_limits<T>::digits - (p + detail::clz(x.raw()));
-
- shift += std::numeric_limits<T>::is_signed ? 1 : 0;
-
- // Use volatile to restrict compiler optimizations on shift as compiler reports maybe-uninitialized error on Android
- volatile int8_t *shift_ptr = &shift;
-
- auto const_three = fixed_point<T>(3, p);
- auto a = (*shift_ptr < 0) ? shift_left(x, -(shift)) : shift_right(x, shift);
- fixed_point<T> x2 = a;
-
- // We need three iterations to find the result for QS8 and five for QS16
- constexpr int num_iterations = std::is_same<T, int8_t>::value ? 3 : 5;
- for(int i = 0; i < num_iterations; ++i)
- {
- fixed_point<T> three_minus_dx = sub(const_three, mul(a, mul(x2, x2)));
- x2 = shift_right(mul(x2, three_minus_dx), 1);
- }
-
- return (shift < 0) ? shift_left(x2, (-shift) >> 1) : shift_right(x2, shift >> 1);
- }
- /** Calculate the hyperbolic tangent of a fixed point number
- *
- * @param[in] x Fixed point operand
- *
- * @return Hyperbolic tangent of the operand
- */
- template <typename T>
- static fixed_point<T> tanh(fixed_point<T> x)
- {
- uint8_t p = x.precision();
- // Constants
- auto const_one = fixed_point<T>(1, p);
- auto const_two = fixed_point<T>(2, p);
-
- auto exp2x = exp(const_two * x);
- auto num = exp2x - const_one;
- auto den = exp2x + const_one;
- auto tanh = num / den;
-
- return tanh;
- }
- /** Calculate the a-th power of a fixed point number.
- *
- * The power is computed as x^a = e^(log(x) * a)
- *
- * @param[in] x Fixed point operand
- * @param[in] a Fixed point exponent
- *
- * @return a-th power of the operand
- */
- template <typename T>
- static fixed_point<T> pow(fixed_point<T> x, fixed_point<T> a)
- {
- return exp(log(x) * a);
- }
-};
-
-template <typename T>
-bool operator==(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return functions::isequal(lhs, rhs);
-}
-template <typename T>
-bool operator!=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return !operator==(lhs, rhs);
-}
-template <typename T>
-bool operator<(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return functions::isless(lhs, rhs);
-}
-template <typename T>
-bool operator>(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return operator<(rhs, lhs);
-}
-template <typename T>
-bool operator<=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return !operator>(lhs, rhs);
-}
-template <typename T>
-bool operator>=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return !operator<(lhs, rhs);
-}
-template <typename T>
-fixed_point<T> operator+(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return functions::add(lhs, rhs);
-}
-template <typename T>
-fixed_point<T> operator-(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
-{
- return functions::sub(lhs, rhs);
-}
-template <typename T>
-fixed_point<T> operator-(const fixed_point<T> &rhs)
-{
- return functions::negate(rhs);
-}
-template <typename T>
-fixed_point<T> operator*(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::mul(x, y);
-}
-template <typename T>
-fixed_point<T> operator/(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::div(x, y);
-}
-template <typename T>
-fixed_point<T> operator>>(fixed_point<T> x, size_t shift)
-{
- return functions::shift_right(x, shift);
-}
-template <typename T>
-fixed_point<T> operator<<(fixed_point<T> x, size_t shift)
-{
- return functions::shift_left(x, shift);
-}
-template <typename T, typename U, typename traits>
-std::basic_ostream<T, traits> &operator<<(std::basic_ostream<T, traits> &s, fixed_point<U> x)
-{
- return functions::write(s, x);
-}
-template <typename T>
-inline fixed_point<T> min(fixed_point<T> x, fixed_point<T> y)
-{
- return x > y ? y : x;
-}
-template <typename T>
-inline fixed_point<T> max(fixed_point<T> x, fixed_point<T> y)
-{
- return x > y ? x : y;
-}
-template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
-inline fixed_point<T> add(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::add<OP>(x, y);
-}
-template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
-inline fixed_point<T> sub(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::sub<OP>(x, y);
-}
-template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
-inline fixed_point<T> mul(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::mul<OP>(x, y);
-}
-template <typename T>
-inline fixed_point<T> div(fixed_point<T> x, fixed_point<T> y)
-{
- return functions::div(x, y);
-}
-template <typename T>
-inline fixed_point<T> abs(fixed_point<T> x)
-{
- return functions::abs(x);
-}
-template <typename T>
-inline fixed_point<T> clamp(fixed_point<T> x, T min, T max)
-{
- return functions::clamp(x, min, max);
-}
-template <typename T>
-inline fixed_point<T> exp(fixed_point<T> x)
-{
- return functions::exp(x);
-}
-template <typename T>
-inline fixed_point<T> log(fixed_point<T> x)
-{
- return functions::log(x);
-}
-template <typename T>
-inline fixed_point<T> inv_sqrt(fixed_point<T> x)
-{
- return functions::inv_sqrt(x);
-}
-template <typename T>
-inline fixed_point<T> tanh(fixed_point<T> x)
-{
- return functions::tanh(x);
-}
-template <typename T>
-inline fixed_point<T> pow(fixed_point<T> x, fixed_point<T> a)
-{
- return functions::pow(x, a);
-}
-} // namespace detail
-
-// Expose operators
-using detail::operator==;
-using detail::operator!=;
-using detail::operator<;
-using detail::operator>;
-using detail::operator<=;
-using detail::operator>=;
-using detail::operator+;
-using detail::operator-;
-using detail::operator*;
-using detail::operator/;
-using detail::operator>>;
-using detail::operator<<;
-
-// Expose additional functions
-using detail::min;
-using detail::max;
-using detail::add;
-using detail::sub;
-using detail::mul;
-using detail::div;
-using detail::abs;
-using detail::clamp;
-using detail::exp;
-using detail::log;
-using detail::inv_sqrt;
-using detail::tanh;
-using detail::pow;
-// TODO: floor
-// TODO: ceil
-// TODO: sqrt
-} // namespace fixed_point_arithmetic
-} // namespace test
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__ */
diff --git a/tests/validation/GLES_COMPUTE/ActivationLayer.cpp b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
index a8c7253b8f..7676b858f6 100644
--- a/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
@@ -61,35 +61,14 @@ AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activ
case ActivationLayerInfo::ActivationFunction::SQUARE:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.1f : epsilon);
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.00001f : epsilon);
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
case ActivationLayerInfo::ActivationFunction::SQRT:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
case ActivationLayerInfo::ActivationFunction::TANH:
- if(is_data_type_fixed_point(data_type))
- {
- return AbsoluteTolerance<float>(5.f);
- }
- else
- {
- return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
- }
+ return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
default:
return AbsoluteTolerance<float>(epsilon);
}
diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
index ac1bd724ac..7679007a82 100644
--- a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
@@ -59,17 +59,17 @@ TEST_SUITE(PoolingLayer)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("InputInfo",
{
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0), // Non-rectangular Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0), // Invalid output Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32), // Non-rectangular Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), // Invalid output Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),
}),
framework::dataset::make("OutputInfo",
{
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0), TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32), TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32),
})),
framework::dataset::make("PoolInfo",
{
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 2b4d277e92..814d1f5ed0 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -177,12 +177,10 @@ void fill_lookuptable(T &&table)
/** Helper function to get the testing range for batch normalization layer.
*
- * @param[in] fixed_point_position (Optional) Number of bits for the fractional part. Defaults to 0.
- *
* @return A pair containing the lower upper testing bounds.
*/
template <typename T>
-std::pair<T, T> get_batchnormalization_layer_test_bounds(int fixed_point_position = 0)
+std::pair<T, T> get_batchnormalization_layer_test_bounds()
{
const bool is_float = std::is_floating_point<T>::value;
std::pair<T, T> bounds;
@@ -194,7 +192,7 @@ std::pair<T, T> get_batchnormalization_layer_test_bounds(int fixed_point_positio
}
else
{
- bounds = std::make_pair(1, 1 << (fixed_point_position));
+ bounds = std::make_pair(1, 1);
}
return bounds;
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 591d1424c8..1d82ff0712 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -75,20 +75,20 @@ TEST_SUITE(NEON)
TEST_SUITE(ConvolutionLayer)
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
+ TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 87d413f202..277953badb 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -100,33 +100,33 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::Sm
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights shape
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 4), // Non supported data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 11), // Invalid bias shape
- TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights shape
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), // Non supported data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape
+ TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 5),
- TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32, 11),
- TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 5),
- TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, 11),
- TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 5),
- TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
})),
framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp
index 7cfffc0c2b..25b357ebed 100644
--- a/tests/validation/NEON/DilatedConvolutionLayer.cpp
+++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp
@@ -64,20 +64,20 @@ TEST_SUITE(NEON)
TEST_SUITE(DilatedConvolutionLayer)
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 8U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 8U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(6U, 6U, 1U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(6U, 6U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index bf5b33c9a2..acd0e5d64b 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -80,45 +80,45 @@ TEST_SUITE(DirectConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching input feature maps
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported kernel width
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non-rectangular weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid stride
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index eb350e1029..9eba3c85c1 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -102,7 +102,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32, 2), // Mismatching data type
+ TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type
TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
diff --git a/tests/validation/NEON/LocallyConnected.cpp b/tests/validation/NEON/LocallyConnected.cpp
index 0c36ff6c85..bd0999df50 100644
--- a/tests/validation/NEON/LocallyConnected.cpp
+++ b/tests/validation/NEON/LocallyConnected.cpp
@@ -51,41 +51,41 @@ TEST_SUITE(LocallyConnected)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/weights
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/bias
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching data type input/output
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/weights
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/bias
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Mismatching shape input/output
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0), // Asymmetric padding
- TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32, 0)
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/bias
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/output
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/weights
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/bias
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching shape input/output
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Asymmetric padding
+ TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 274U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32, 0)
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 274U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U, 275U), 1, DataType::F32)
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 274U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(21U, 275U), 1, DataType::F32, 0)
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F16),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 274U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 275U), 1, DataType::F32)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 22U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0)
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F16),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 22U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32)
})),
framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0),
PadStrideInfo(2, 1, 0, 0),
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index 02cca0b452..a4321000f5 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -66,19 +66,19 @@ TEST_SUITE(NormalizationLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/output
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching shapes
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Even normalization
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non implemented IN_MAP_2D
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Even normalization
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non implemented IN_MAP_2D
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
})),
framework::dataset::make("NormInfo", { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index bbfca46ca9..336c066fa9 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -71,21 +71,21 @@ TEST_SUITE(PoolingLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
- TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0), // Non-rectangular Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0), // Invalid output Global Pooling
- TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid pad/size combination
+ TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32), // Non-rectangular Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), // Invalid output Global Pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),
}),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
- TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
})),
framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
diff --git a/tests/validation/NEON/RNNLayer.cpp b/tests/validation/NEON/RNNLayer.cpp
index 7aa3befd03..a5f84990f2 100644
--- a/tests/validation/NEON/RNNLayer.cpp
+++ b/tests/validation/NEON/RNNLayer.cpp
@@ -49,59 +49,59 @@ TEST_SUITE(RNNLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::U8, 0), // Wrong data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Wrong input size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong weights size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong recurrent weights size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong bias size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong output size
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32, 0), // Wrong hidden output size
- TensorInfo(TensorShape(32U, 32U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::U8), // Wrong data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong weights size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong recurrent weights size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong bias size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong output size
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Wrong hidden output size
+ TensorInfo(TensorShape(32U, 32U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 32U), 1, DataType::F32, 0),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U), 1, DataType::F32),
})),
- framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 32U), 1, DataType::F32, 0),
+ framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(30U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U), 1, DataType::F32, 0),
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 32U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U), 1, DataType::F32),
})),
- framework::dataset::make("HiddenStateInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(11U, 13U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(32U, 32U), 1, DataType::F32, 0),
+ framework::dataset::make("HiddenStateInfo", { TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U), 1, DataType::F32),
})),
framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index 5f5cfdd808..0d4a86e372 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -77,17 +77,17 @@ TEST_SUITE(Scale)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8, 0), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported sampling point
- TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32, 0), // Invalid policy
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Insufficient padding
- TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32, 0),
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported sampling point
+ TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32), // Invalid policy
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Insufficient padding
+ TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32),
}),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32, 0),
- TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32, 0),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32),
+ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32),
})),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR,
InterpolationPolicy::NEAREST_NEIGHBOR,
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index 0c96052368..9ce597b621 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -24,7 +24,6 @@
#ifndef __ARM_COMPUTE_TEST_VALIDATION_H__
#define __ARM_COMPUTE_TEST_VALIDATION_H__
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
#include "support/ToolchainSupport.h"
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 24539545ca..499628c438 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -65,16 +65,10 @@ protected:
std::uniform_real_distribution<> distribution(-1.f, 1.f);
library->fill(tensor, distribution, 0);
}
- else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+ else // data type is quantized_asymmetric
{
library->fill_tensor_uniform(tensor, 0);
}
- else
- {
- const int one_fixed = 1;
- std::uniform_int_distribution<> distribution(-one_fixed, one_fixed);
- library->fill(tensor, distribution, 0);
- }
}
TensorType compute_target(TensorShape shape, PoolingLayerInfo info,
diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h
index 59ce5192ff..99c0710f7f 100644
--- a/tests/validation/fixtures/SoftmaxLayerFixture.h
+++ b/tests/validation/fixtures/SoftmaxLayerFixture.h
@@ -64,17 +64,11 @@ protected:
std::uniform_real_distribution<> distribution(-1000.f, 1000.f);
library->fill(tensor, distribution, 0);
}
- else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+ else // data type is quantized_asymmetric
{
std::uniform_int_distribution<> distribution(0, 100);
library->fill(tensor, distribution, 0);
}
- else
- {
- const int one_fixed = 1;
- std::uniform_int_distribution<> distribution(-one_fixed, one_fixed);
- library->fill(tensor, distribution, 0);
- }
}
TensorType compute_target(const TensorShape &shape, DataType data_type,
@@ -139,20 +133,6 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class SoftmaxValidationFixedPointFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
- data_type,
- QuantizationInfo(),
- 1.0f);
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
diff --git a/tests/validation/reference/ArithmeticSubtraction.cpp b/tests/validation/reference/ArithmeticSubtraction.cpp
index bed2d37090..f39d01f9e8 100644
--- a/tests/validation/reference/ArithmeticSubtraction.cpp
+++ b/tests/validation/reference/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,6 @@
*/
#include "ArithmeticSubtraction.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp
index 3d1a6ed7d7..4ea3769c2c 100644
--- a/tests/validation/reference/BatchNormalizationLayer.cpp
+++ b/tests/validation/reference/BatchNormalizationLayer.cpp
@@ -25,7 +25,6 @@
#include "ActivationLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/ChannelCombine.cpp b/tests/validation/reference/ChannelCombine.cpp
index c1ec3ec578..b76dcaca8c 100644
--- a/tests/validation/reference/ChannelCombine.cpp
+++ b/tests/validation/reference/ChannelCombine.cpp
@@ -24,7 +24,6 @@
#include "ChannelCombine.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/ChannelExtract.cpp b/tests/validation/reference/ChannelExtract.cpp
index 595bb13098..6f17fc06fe 100644
--- a/tests/validation/reference/ChannelExtract.cpp
+++ b/tests/validation/reference/ChannelExtract.cpp
@@ -24,7 +24,6 @@
#include "ChannelExtract.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/ColorConvert.cpp b/tests/validation/reference/ColorConvert.cpp
index a8a530498e..6aa2ffa14c 100644
--- a/tests/validation/reference/ColorConvert.cpp
+++ b/tests/validation/reference/ColorConvert.cpp
@@ -24,7 +24,6 @@
#include "ColorConvert.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ColorConvertHelper.h"
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 700175880b..2e5fefd99a 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -25,7 +25,6 @@
#define __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
@@ -91,74 +90,16 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weig
*out_ptr = acc + (*b_ptr);
}
-// 3D convolution for fixed point type
-template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
+// 3D convolution for QASYMM8 type
+template < typename T, typename TB, typename std::enable_if < std::is_same<T, uint8_t>::value &&std::is_same<TB, int32_t>::value, int >::type = 0 >
inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
int i_offset, int w_offset, int b_offset, int o_offset,
int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
{
- const T *in_ptr = in.data() + i_offset;
- const T *w_ptr = weights.data() + w_offset;
- const T *b_ptr = bias.data() + b_offset;
- T *out_ptr = out.data() + o_offset;
- int fixed_point_position = in.fixed_point_position();
-
- const int half_width_weights_start = width_weights / 2;
- const int half_width_weights_end = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
- const int half_height_weights_start = height_weights / 2;
- const int half_height_weights_end = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
-
- using namespace fixed_point_arithmetic;
- using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
-
- // Reset accumulator
- fixed_point<promoted_type> acc(0, fixed_point_position);
-
- // Compute a 2D convolution for each IFM and accumulate the result
- for(int ifm = 0; ifm < depth_in; ++ifm)
- {
- // Compute the offset for the input slice
- const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
- // Compute 2D convolution
- for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
- {
- for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
- {
- // Check if the pixel is out-of-bound
- if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
- {
- const int idx = xk + half_width_weights_start;
- const int idy = yk + half_height_weights_start;
-
- const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in], fixed_point_position, true);
- const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
- const fixed_point<promoted_type> iw = i_value * w_value;
- acc = iw + acc;
- }
- }
- }
- }
-
- // Get the bias
- const fixed_point<promoted_type> b(*b_ptr, fixed_point_position, true);
-
- // Accumulate the bias and covert back
- acc = acc + b;
- fixed_point<T> res(acc);
- *out_ptr = res.raw();
-}
-
-// 3D convolution for QASYMM8 type
-template <>
-inline void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out,
- int i_offset, int w_offset, int b_offset, int o_offset,
- int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x, int dilation_y)
-{
- const uint8_t *in_ptr = in.data() + i_offset;
- const uint8_t *w_ptr = weights.data() + w_offset;
- const int32_t *b_ptr = bias.data() + b_offset;
- uint8_t *out_ptr = out.data() + o_offset;
+ const T *in_ptr = in.data() + i_offset;
+ const T *w_ptr = weights.data() + w_offset;
+ const TB *b_ptr = bias.data() + b_offset;
+ T *out_ptr = out.data() + o_offset;
const int input_offset = -in.quantization_info().offset;
const float input_scale = in.quantization_info().scale;
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index 00c839d2df..e212e2742f 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -23,7 +23,6 @@
*/
#include "ConvolutionLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/Convolution3d.h"
#include "tests/validation/reference/Permute.h"
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index d073bbf7a1..e73023e419 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -23,7 +23,6 @@
*/
#include "ConvolutionLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
index c9a23520c7..dbcd575e9a 100644
--- a/tests/validation/reference/DepthConcatenateLayer.cpp
+++ b/tests/validation/reference/DepthConcatenateLayer.cpp
@@ -23,7 +23,6 @@
*/
#include "DepthConcatenateLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index 022007720a..6f90963360 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -23,7 +23,6 @@
*/
#include "DepthConvertLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
#include "tests/Types.h"
@@ -61,33 +60,6 @@ SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, Con
return result;
}
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&std::is_same<T1, T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
- ARM_COMPUTE_UNUSED(policy);
-
- using namespace fixed_point_arithmetic;
-
- SimpleTensor<T2> result(src.shape(), dt_out);
-
- bool is_in_place = (&src == &result);
-
- const int fixed_point_position_in = src.fixed_point_position();
- const int fixed_point_position_out = (is_in_place) ? static_cast<int>(shift) : result.fixed_point_position();
-
- if(!is_in_place || (fixed_point_position_in != fixed_point_position_out))
- {
- for(int i = 0; i < src.num_elements(); ++i)
- {
- auto x = fixed_point<T2>(src[i], fixed_point_position_in, true);
- x.resacle(fixed_point_position_out);
- result[i] = x.raw();
- }
- }
-
- return result;
-}
-
template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index d8f3cbae49..39429e2449 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -26,7 +26,6 @@
#include "ConvolutionLayer.h"
#include "Utils.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/Utils.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
diff --git a/tests/validation/reference/FixedPoint.cpp b/tests/validation/reference/FixedPoint.cpp
deleted file mode 100644
index a016093ed6..0000000000
--- a/tests/validation/reference/FixedPoint.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "FixedPoint.h"
-
-#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> fixed_point_operation(const SimpleTensor<T> &src, FixedPointOp op)
-{
- SimpleTensor<T> result(src.shape(), src.data_type());
-
- const int p = src.fixed_point_position();
- switch(op)
- {
- case FixedPointOp::EXP:
- for(int i = 0; i < src.num_elements(); ++i)
- {
- result[i] = fixed_point_arithmetic::exp(fixed_point_arithmetic::fixed_point<T>(src[i], p, true)).raw();
- }
- break;
- case FixedPointOp::LOG:
- for(int i = 0; i < src.num_elements(); ++i)
- {
- result[i] = fixed_point_arithmetic::log(fixed_point_arithmetic::fixed_point<T>(src[i], p, true)).raw();
- }
- break;
- case FixedPointOp::INV_SQRT:
- for(int i = 0; i < src.num_elements(); ++i)
- {
- result[i] = fixed_point_arithmetic::inv_sqrt(fixed_point_arithmetic::fixed_point<T>(src[i], p, true)).raw();
- }
- break;
- case FixedPointOp::RECIPROCAL:
- for(int i = 0; i < src.num_elements(); ++i)
- {
- result[i] = fixed_point_arithmetic::div(fixed_point_arithmetic::fixed_point<T>(1, p), fixed_point_arithmetic::fixed_point<T>(src[i], p, true)).raw();
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Fixed point operation not supported");
- break;
- }
-
- return result;
-}
-
-template SimpleTensor<int8_t> fixed_point_operation(const SimpleTensor<int8_t> &src, FixedPointOp op);
-template SimpleTensor<int16_t> fixed_point_operation(const SimpleTensor<int16_t> &src, FixedPointOp op);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/reference/FixedPoint.h b/tests/validation/reference/FixedPoint.h
deleted file mode 100644
index f0117f9dd0..0000000000
--- a/tests/validation/reference/FixedPoint.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_FIXED_POINT_OPERATION_H__
-#define __ARM_COMPUTE_TEST_FIXED_POINT_OPERATION_H__
-
-#include "tests/SimpleTensor.h"
-#include "tests/Types.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> fixed_point_operation(const SimpleTensor<T> &src, FixedPointOp op);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_FIXED_POINT_OPERATION_H__ */
diff --git a/tests/validation/reference/FlattenLayer.cpp b/tests/validation/reference/FlattenLayer.cpp
index e140d752a0..381ce37051 100644
--- a/tests/validation/reference/FlattenLayer.cpp
+++ b/tests/validation/reference/FlattenLayer.cpp
@@ -23,8 +23,6 @@
*/
#include "FlattenLayer.h"
-#include "tests/validation/FixedPoint.h"
-
namespace arm_compute
{
namespace test
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index 3ef10eacea..d65d0caab0 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -24,7 +24,6 @@
#include "FullyConnectedLayer.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp
index 7378ada4ab..2feab89950 100644
--- a/tests/validation/reference/GEMM.cpp
+++ b/tests/validation/reference/GEMM.cpp
@@ -24,7 +24,6 @@
#include "GEMM.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
@@ -85,75 +84,6 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
return dst;
}
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
-{
- using namespace fixed_point_arithmetic;
-
- // Create reference
- SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
-
- // Compute reference
- using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
-
- const int M = dst.shape().y();
- const int N = dst.shape().x();
- const int K = a.shape().x();
- const int D = a.shape().z(); // Number of matrices in a batch
- const int W = a.shape()[3]; // Number of batched-gemm (Winograd case)
-
- const int a_stride_z = K * M;
- const int a_stride_w = K * M * D;
-
- const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
- const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
-
- const int c_stride_z = N * M;
- const int c_stride_w = N * M * D;
-
- const int fixed_point_position = a.fixed_point_position();
- const fixed_point<T> alpha_q(alpha, fixed_point_position);
- const fixed_point<T> beta_q(beta, fixed_point_position);
-
- for(int w = 0; w < W; ++w)
- {
- for(int depth = 0; depth < D; ++depth)
- {
- const int base_addr_a = depth * a_stride_z + w * a_stride_w;
- const int base_addr_b = depth * b_stride_z + w * b_stride_w;
- const int base_addr_c = depth * c_stride_z + w * c_stride_w;
-
- for(int row = 0; row < M; ++row)
- {
- for(int col = 0; col < N; ++col)
- {
- fixed_point<promoted_type> acc_q(0, fixed_point_position);
-
- for(int k = 0; k < K; ++k)
- {
- const fixed_point<promoted_type> a0_q(a[base_addr_a + row * K + k], fixed_point_position, true);
- const fixed_point<promoted_type> b0_q(b[base_addr_b + k * N + col], fixed_point_position, true);
-
- acc_q = acc_q + (a0_q * b0_q);
- }
-
- // Finalize the result: alpha * A * B + beta * C
- const fixed_point<T> c0_q(c[base_addr_c + col + row * N], fixed_point_position, true);
-
- fixed_point<T> res_q(acc_q);
- res_q = alpha_q * res_q;
- res_q = res_q + (beta_q * c0_q);
-
- // Store the result
- dst[base_addr_c + col + row * N] = res_q.raw();
- }
- }
- }
- }
-
- return dst;
-}
-
template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta);
template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
} // namespace reference
diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h
index cda792bf8b..39007c60bc 100644
--- a/tests/validation/reference/GEMM.h
+++ b/tests/validation/reference/GEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,8 +38,6 @@ namespace reference
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta);
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/GEMMInterleave4x4.h b/tests/validation/reference/GEMMInterleave4x4.h
index e6b09afb9a..e3d72d91aa 100644
--- a/tests/validation/reference/GEMMInterleave4x4.h
+++ b/tests/validation/reference/GEMMInterleave4x4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "GEMM.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
diff --git a/tests/validation/reference/GEMMInterleaveBlocked.h b/tests/validation/reference/GEMMInterleaveBlocked.h
index ff5a0d647c..d649a512e3 100644
--- a/tests/validation/reference/GEMMInterleaveBlocked.h
+++ b/tests/validation/reference/GEMMInterleaveBlocked.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "GEMM.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
diff --git a/tests/validation/reference/GEMMTranspose1xW.h b/tests/validation/reference/GEMMTranspose1xW.h
index d6a2e89176..6ec70b1067 100644
--- a/tests/validation/reference/GEMMTranspose1xW.h
+++ b/tests/validation/reference/GEMMTranspose1xW.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "GEMM.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
diff --git a/tests/validation/reference/NormalizationLayer.cpp b/tests/validation/reference/NormalizationLayer.cpp
index 85872c8f90..2ae68c63cf 100644
--- a/tests/validation/reference/NormalizationLayer.cpp
+++ b/tests/validation/reference/NormalizationLayer.cpp
@@ -24,7 +24,6 @@
#include "NormalizationLayer.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
@@ -146,125 +145,6 @@ SimpleTensor<T> normalization_layer(const SimpleTensor<T> &src, NormalizationLay
return dst;
}
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<T> normalization_layer(const SimpleTensor<T> &src, NormalizationLayerInfo info)
-{
- using namespace fixed_point_arithmetic;
-
- // Create reference
- SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
-
- // Compute reference
- const int fixed_point_position = src.fixed_point_position();
-
- const uint32_t norm_size = info.norm_size();
- NormType type = info.type();
- fixed_point<T> beta(info.beta(), fixed_point_position);
- fixed_point<T> kappa(info.kappa(), fixed_point_position);
-
- const int cols = src.shape()[0];
- const int rows = src.shape()[1];
- const int depth = src.shape()[2];
- int upper_dims = src.shape().total_size() / (cols * rows);
-
- fixed_point<T> coeff(info.scale_coeff(), fixed_point_position);
- int radius_cols = norm_size / 2;
-
- // IN_MAP_1D and CROSS_MAP normalize over a single axis only
- int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
-
- if(type == NormType::CROSS_MAP)
- {
- // Remove also depth from upper dimensions since it is the dimension we
- // want to use for normalization
- upper_dims /= depth;
-
- for(int r = 0; r < upper_dims; ++r)
- {
- for(int i = 0; i < rows; ++i)
- {
- for(int k = 0; k < cols; ++k)
- {
- for(int l = 0; l < depth; ++l)
- {
- fixed_point<T> accumulated_scale(0.f, fixed_point_position);
-
- for(int j = -radius_cols; j <= radius_cols; ++j)
- {
- const int z = l + j;
-
- if(z >= 0 && z < depth)
- {
- const T value = src[k + i * cols + z * rows * cols + r * cols * rows * depth];
- const fixed_point<T> fp_value(value, fixed_point_position, true);
- accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
- }
- }
-
- accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
- dst[k + i * cols + l * rows * cols + r * cols * rows * depth] = accumulated_scale.raw();
- }
- }
- }
- }
- }
- else
- {
- for(int r = 0; r < upper_dims; ++r)
- {
- for(int i = 0; i < rows; ++i)
- {
- for(int k = 0; k < cols; ++k)
- {
- fixed_point<T> accumulated_scale(0.f, fixed_point_position);
-
- for(int j = -radius_rows; j <= radius_rows; ++j)
- {
- const int y = i + j;
-
- for(int l = -radius_cols; l <= radius_cols; ++l)
- {
- const int x = k + l;
-
- if((x >= 0 && y >= 0) && (x < cols && y < rows))
- {
- const T value = src[x + y * cols + r * cols * rows];
- const fixed_point<T> fp_value(value, fixed_point_position, true);
- accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
- }
- }
- }
-
- accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
- dst[k + i * cols + r * cols * rows] = accumulated_scale.raw();
- }
- }
- }
- }
-
- if(info.beta() == 1.f)
- {
- for(int i = 0; i < dst.num_elements(); ++i)
- {
- fixed_point<T> res = div(fixed_point<T>(src[i], fixed_point_position, true), fixed_point<T>(dst[i], fixed_point_position, true));
- dst[i] = res.raw();
- }
- }
- else
- {
- const fixed_point<T> beta(info.beta(), fixed_point_position);
-
- for(int i = 0; i < dst.num_elements(); ++i)
- {
- fixed_point<T> res = pow(fixed_point<T>(dst[i], fixed_point_position, true), beta);
- res = div(fixed_point<T>(src[i], fixed_point_position, true), res);
- dst[i] = res.raw();
- }
- }
-
- return dst;
-}
-
template SimpleTensor<float> normalization_layer(const SimpleTensor<float> &src, NormalizationLayerInfo info);
template SimpleTensor<half> normalization_layer(const SimpleTensor<half> &src, NormalizationLayerInfo info);
} // namespace reference
diff --git a/tests/validation/reference/NormalizationLayer.h b/tests/validation/reference/NormalizationLayer.h
index 3f624ff30a..3448baf385 100644
--- a/tests/validation/reference/NormalizationLayer.h
+++ b/tests/validation/reference/NormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,8 +38,6 @@ namespace reference
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
SimpleTensor<T> normalization_layer(const SimpleTensor<T> &src, NormalizationLayerInfo info);
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<T> normalization_layer(const SimpleTensor<T> &src, NormalizationLayerInfo info);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
index 7304fb0673..859da5ce59 100644
--- a/tests/validation/reference/PixelWiseMultiplication.cpp
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -23,8 +23,6 @@
*/
#include "PixelWiseMultiplication.h"
-#include "tests/validation/FixedPoint.h"
-
namespace arm_compute
{
namespace test
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index e9054b9043..02c430a64f 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -44,7 +43,7 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo
ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
// Create reference
- SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type(), src.fixed_point_position()), info), src.data_type(), 1 };
+ SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 };
const int pool_size_x = info.is_global_pooling() ? src.shape().x() : info.pool_size().width;
const int pool_size_y = info.is_global_pooling() ? src.shape().y() : info.pool_size().height;
diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
index ae4bcd8f0e..aa640ad5e6 100644
--- a/tests/validation/reference/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -24,7 +24,6 @@
#include "SoftmaxLayer.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
namespace arm_compute
{
@@ -71,63 +70,21 @@ SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
return dst;
}
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
+template <typename T, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type>
SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
{
- ARM_COMPUTE_UNUSED(beta);
-
- using namespace fixed_point_arithmetic;
-
- // Create reference
- SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
-
- // Compute reference
- const int cols = src.shape()[0];
- const int upper_dims = src.num_elements() / cols;
-
- for(int r = 0; r < upper_dims; ++r)
- {
- const T *src_row_ptr = src.data() + r * cols;
- T *dst_row_ptr = dst.data() + r * cols;
-
- // Find max
- const fixed_point<T> max(*std::max_element(src_row_ptr, src_row_ptr + cols), src.fixed_point_position(), true);
-
- // Regularize
- using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
- fixed_point<promoted_type> sum(0, src.fixed_point_position(), true);
- std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&](T val)
- {
- const fixed_point<T> res = exp(fixed_point<T>(val, src.fixed_point_position(), true) - max);
- sum = add(sum, fixed_point<promoted_type>(res.raw(), src.fixed_point_position(), true));
- return res.raw();
- });
-
- // Normalize
- fixed_point<T> saturated_sum(sum);
- std::transform(dst_row_ptr, dst_row_ptr + cols, dst_row_ptr, [&](T val)
- {
- return div(fixed_point<T>(val, src.fixed_point_position(), true), saturated_sum).raw();
- });
- }
-
- return dst;
-}
-
-template <>
-SimpleTensor<uint8_t> softmax_layer<uint8_t>(const SimpleTensor<uint8_t> &src, float beta)
-{
// Note: Output quantization info should always have scale = 1/256 and offset = 0
const QuantizationInfo output_quantization_info = QuantizationInfo(1.f / 256, 0);
- SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
- SimpleTensor<float> dst_tmp = softmax_layer<float>(src_tmp, beta);
- SimpleTensor<uint8_t> dst = convert_to_asymmetric(dst_tmp, output_quantization_info);
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = softmax_layer<float>(src_tmp, beta);
+ SimpleTensor<T> dst = convert_to_asymmetric(dst_tmp, output_quantization_info);
return dst;
}
template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src, float beta);
template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src, float beta);
+template SimpleTensor<uint8_t> softmax_layer(const SimpleTensor<uint8_t> &src, float beta);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/SoftmaxLayer.h b/tests/validation/reference/SoftmaxLayer.h
index a6d4c3b8cf..21dca1e52b 100644
--- a/tests/validation/reference/SoftmaxLayer.h
+++ b/tests/validation/reference/SoftmaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ namespace reference
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+template <typename T, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type = 0>
SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/Transpose.cpp b/tests/validation/reference/Transpose.cpp
index 736f37e4dc..348c7030cb 100644
--- a/tests/validation/reference/Transpose.cpp
+++ b/tests/validation/reference/Transpose.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "Transpose.h"
#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp
index 5b89934df5..7a5ece8f5e 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/WidthConcatenateLayer.cpp
@@ -23,7 +23,6 @@
*/
#include "WidthConcatenateLayer.h"
-#include "tests/validation/FixedPoint.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 2bd30d53f8..49c07938bd 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -249,58 +249,6 @@ inline std::string to_string(const QuantizationInfo &quantization_info)
return str.str();
}
-/** Formatted output of the FixedPointOp type.
- *
- * @param[out] os Output stream.
- * @param[in] op Type to output.
- *
- * @return Modified output stream.
- */
-inline ::std::ostream &operator<<(::std::ostream &os, const FixedPointOp &op)
-{
- switch(op)
- {
- case FixedPointOp::ADD:
- os << "ADD";
- break;
- case FixedPointOp::SUB:
- os << "SUB";
- break;
- case FixedPointOp::MUL:
- os << "MUL";
- break;
- case FixedPointOp::EXP:
- os << "EXP";
- break;
- case FixedPointOp::LOG:
- os << "LOG";
- break;
- case FixedPointOp::INV_SQRT:
- os << "INV_SQRT";
- break;
- case FixedPointOp::RECIPROCAL:
- os << "RECIPROCAL";
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-
- return os;
-}
-
-/** Formatted output of the FixedPointOp type.
- *
- * @param[in] op Type to output.
- *
- * @return Formatted string.
- */
-inline std::string to_string(const FixedPointOp &op)
-{
- std::stringstream str;
- str << op;
- return str.str();
-}
-
/** Formatted output of the activation function type.
*
* @param[out] os Output stream.
@@ -856,8 +804,7 @@ inline std::string to_string(const TensorInfo &info)
std::stringstream str;
str << "{Shape=" << info.tensor_shape() << ","
<< "Type=" << info.data_type() << ","
- << "Channels=" << info.num_channels() << ","
- << "FixedPointPos=" << info.fixed_point_position() << "}";
+ << "Channels=" << info.num_channels() << "}";
return str.str();
}