aboutsummaryrefslogtreecommitdiff
path: root/tests/validation
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation')
-rw-r--r--tests/validation/CL/ActivationLayer.cpp3
-rw-r--r--tests/validation/CL/ArgMinMax.cpp169
-rw-r--r--tests/validation/CL/ArithmeticAddition.cpp154
-rw-r--r--tests/validation/CL/ArithmeticDivision.cpp52
-rw-r--r--tests/validation/CL/ArithmeticSubtraction.cpp89
-rw-r--r--tests/validation/CL/BatchNormalizationLayer.cpp4
-rw-r--r--tests/validation/CL/BatchToSpaceLayer.cpp88
-rw-r--r--tests/validation/CL/Cast.cpp52
-rw-r--r--tests/validation/CL/Col2Im.cpp20
-rw-r--r--tests/validation/CL/Comparisons.cpp6
-rw-r--r--tests/validation/CL/Convolution3D.cpp300
-rw-r--r--tests/validation/CL/ConvolutionLayer.cpp323
-rw-r--r--tests/validation/CL/DeconvolutionLayer.cpp240
-rw-r--r--tests/validation/CL/DepthConvertLayer.cpp4
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayer.cpp851
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayerNative.cpp442
-rw-r--r--tests/validation/CL/DilatedConvolutionLayer.cpp11
-rw-r--r--tests/validation/CL/DirectConvolutionLayer.cpp407
-rw-r--r--tests/validation/CL/ElementwiseMax.cpp63
-rw-r--r--tests/validation/CL/ElementwiseMin.cpp63
-rw-r--r--tests/validation/CL/ElementwisePower.cpp44
-rw-r--r--tests/validation/CL/ElementwiseSquaredDiff.cpp59
-rw-r--r--tests/validation/CL/FullyConnectedLayer.cpp225
-rw-r--r--tests/validation/CL/GEMM.cpp52
-rw-r--r--tests/validation/CL/GEMMLowp.cpp274
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp6
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp8
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp8
-rw-r--r--tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp206
-rw-r--r--tests/validation/CL/GEMMMatrixMultiply.cpp339
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp334
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyNative.cpp7
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp140
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp10
-rw-r--r--tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp232
-rw-r--r--tests/validation/CL/GEMMReshapeLHSMatrix.cpp41
-rw-r--r--tests/validation/CL/GEMMReshapeRHSMatrix.cpp2
-rw-r--r--tests/validation/CL/Gather.cpp61
-rw-r--r--tests/validation/CL/Im2Col.cpp42
-rw-r--r--tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp89
-rw-r--r--tests/validation/CL/IndirectConvolutionLayer.cpp268
-rw-r--r--tests/validation/CL/MatMul.cpp232
-rw-r--r--tests/validation/CL/MatMulKernel.cpp650
-rw-r--r--tests/validation/CL/MatMulLowpNativeKernel.cpp411
-rw-r--r--tests/validation/CL/MatMulLowpNativeMMULKernel.cpp394
-rw-r--r--tests/validation/CL/MatMulNativeMMULKernel.cpp501
-rw-r--r--tests/validation/CL/MaxUnpoolingLayer.cpp7
-rw-r--r--tests/validation/CL/PReluLayer.cpp9
-rw-r--r--tests/validation/CL/PadLayer.cpp61
-rw-r--r--tests/validation/CL/PixelWiseMultiplication.cpp50
-rw-r--r--tests/validation/CL/Pooling3dLayer.cpp345
-rw-r--r--tests/validation/CL/PoolingLayer.cpp37
-rw-r--r--tests/validation/CL/ReduceMean.cpp4
-rw-r--r--tests/validation/CL/Remap.cpp77
-rw-r--r--tests/validation/CL/Reverse.cpp52
-rw-r--r--tests/validation/CL/RsqrtLayer.cpp35
-rw-r--r--tests/validation/CL/Scale.cpp21
-rw-r--r--tests/validation/CL/ScatterLayer.cpp298
-rw-r--r--tests/validation/CL/Tile.cpp3
-rw-r--r--tests/validation/CL/Transpose.cpp25
-rw-r--r--tests/validation/CL/UNIT/DynamicTensor.cpp2
-rw-r--r--tests/validation/CL/UNIT/Multithreaded.cpp113
-rw-r--r--tests/validation/CL/WeightsReshape.cpp20
-rw-r--r--tests/validation/CL/Winograd.cpp599
-rw-r--r--tests/validation/CMakeLists.txt146
-rw-r--r--tests/validation/CPP/DFT.cpp4
-rw-r--r--tests/validation/Helpers.cpp226
-rw-r--r--tests/validation/Helpers.h124
-rw-r--r--tests/validation/NEON/ActivationLayer.cpp108
-rw-r--r--tests/validation/NEON/AddMulAdd.cpp234
-rw-r--r--tests/validation/NEON/ArgMinMax.cpp164
-rw-r--r--tests/validation/NEON/ArithmeticAddition.cpp190
-rw-r--r--tests/validation/NEON/ArithmeticSubtraction.cpp112
-rw-r--r--tests/validation/NEON/BatchNormalizationLayer.cpp2
-rw-r--r--tests/validation/NEON/BatchToSpaceLayer.cpp86
-rw-r--r--tests/validation/NEON/Cast.cpp77
-rw-r--r--tests/validation/NEON/Col2Im.cpp8
-rw-r--r--tests/validation/NEON/Convolution3D.cpp228
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp1123
-rw-r--r--tests/validation/NEON/DeconvolutionLayer.cpp217
-rw-r--r--tests/validation/NEON/DepthConvertLayer.cpp28
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayer.cpp847
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp43
-rw-r--r--tests/validation/NEON/DilatedConvolutionLayer.cpp16
-rw-r--r--tests/validation/NEON/DirectConvolutionLayer.cpp88
-rw-r--r--tests/validation/NEON/ElementwiseAbsoluteValue.cpp38
-rw-r--r--tests/validation/NEON/ElementwiseDivision.cpp37
-rw-r--r--tests/validation/NEON/ElementwiseExpLayer.cpp40
-rw-r--r--tests/validation/NEON/ElementwiseKernelSelection.cpp159
-rw-r--r--tests/validation/NEON/ElementwiseLog.cpp41
-rw-r--r--tests/validation/NEON/ElementwiseMax.cpp59
-rw-r--r--tests/validation/NEON/ElementwiseMin.cpp62
-rw-r--r--tests/validation/NEON/ElementwiseNegation.cpp38
-rw-r--r--tests/validation/NEON/ElementwisePower.cpp29
-rw-r--r--tests/validation/NEON/ElementwiseRound.cpp37
-rw-r--r--tests/validation/NEON/ElementwiseRsqrtLayer.cpp38
-rw-r--r--tests/validation/NEON/ElementwiseSin.cpp33
-rw-r--r--tests/validation/NEON/ElementwiseSquareDiff.cpp56
-rw-r--r--tests/validation/NEON/FillBorder.cpp12
-rw-r--r--tests/validation/NEON/Floor.cpp29
-rw-r--r--tests/validation/NEON/FullyConnectedLayer.cpp425
-rw-r--r--tests/validation/NEON/GEMM.cpp328
-rw-r--r--tests/validation/NEON/GEMMLowp.cpp612
-rw-r--r--tests/validation/NEON/Gather.cpp12
-rw-r--r--tests/validation/NEON/Im2Col.cpp49
-rw-r--r--tests/validation/NEON/LSTMLayerQuantized.cpp6
-rw-r--r--tests/validation/NEON/MatMul.cpp467
-rw-r--r--tests/validation/NEON/MaxUnpoolingLayer.cpp39
-rw-r--r--tests/validation/NEON/MeanStdDevNormalizationLayer.cpp19
-rw-r--r--tests/validation/NEON/PixelWiseMultiplication.cpp61
-rw-r--r--tests/validation/NEON/Pooling3dLayer.cpp361
-rw-r--r--tests/validation/NEON/PoolingLayer.cpp96
-rw-r--r--tests/validation/NEON/QuantizationLayer.cpp30
-rw-r--r--tests/validation/NEON/RNNLayer.cpp4
-rw-r--r--tests/validation/NEON/ReduceMean.cpp9
-rw-r--r--tests/validation/NEON/Remap.cpp77
-rw-r--r--tests/validation/NEON/ReorderLayer.cpp107
-rw-r--r--tests/validation/NEON/ReshapeLayer.cpp49
-rw-r--r--tests/validation/NEON/Reverse.cpp52
-rw-r--r--tests/validation/NEON/Scale.cpp140
-rw-r--r--tests/validation/NEON/SoftmaxLayer.cpp276
-rw-r--r--tests/validation/NEON/StackLayer.cpp211
-rw-r--r--tests/validation/NEON/UNIT/RuntimeContext.cpp20
-rw-r--r--tests/validation/NEON/UNIT/TensorAllocator.cpp4
-rw-r--r--tests/validation/UNIT/CPPScheduler.cpp91
-rw-r--r--tests/validation/UNIT/GPUTarget.cpp22
-rw-r--r--tests/validation/UNIT/SubTensorInfo.cpp11
-rw-r--r--tests/validation/UNIT/TensorInfo.cpp13
-rw-r--r--tests/validation/Validation.h27
-rw-r--r--tests/validation/cpu/unit/Context.cpp10
-rw-r--r--tests/validation/dynamic_fusion/Utils.h73
-rw-r--r--tests/validation/dynamic_fusion/gpu/Integration.cpp642
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Add.cpp264
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Cast.cpp97
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp184
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp474
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp260
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp335
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Mul.cpp221
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp219
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp147
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Resize.cpp359
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp154
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp219
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Sub.cpp262
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp154
-rw-r--r--tests/validation/fixtures/ActivationLayerFixture.h22
-rw-r--r--tests/validation/fixtures/AddMulAddFixture.h270
-rw-r--r--tests/validation/fixtures/ArgMinMaxFixture.h55
-rw-r--r--tests/validation/fixtures/ArithmeticDivisionFixture.h4
-rw-r--r--tests/validation/fixtures/ArithmeticOperationsFixture.h182
-rw-r--r--tests/validation/fixtures/BatchNormalizationLayerFixture.h3
-rw-r--r--tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h3
-rw-r--r--tests/validation/fixtures/BatchToSpaceLayerFixture.h46
-rw-r--r--tests/validation/fixtures/BitwiseAndFixture.h3
-rw-r--r--tests/validation/fixtures/BitwiseNotFixture.h3
-rw-r--r--tests/validation/fixtures/BitwiseOrFixture.h3
-rw-r--r--tests/validation/fixtures/BitwiseXorFixture.h3
-rw-r--r--tests/validation/fixtures/BoundingBoxTransformFixture.h5
-rw-r--r--tests/validation/fixtures/CastFixture.h13
-rw-r--r--tests/validation/fixtures/ChannelShuffleLayerFixture.h3
-rw-r--r--tests/validation/fixtures/Col2ImFixture.h14
-rw-r--r--tests/validation/fixtures/ComparisonFixture.h7
-rw-r--r--tests/validation/fixtures/ComputeAllAnchorsFixture.h5
-rw-r--r--tests/validation/fixtures/ConcatenateLayerFixture.h3
-rw-r--r--tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h5
-rw-r--r--tests/validation/fixtures/ConvolutionLayerFixture.h539
-rw-r--r--tests/validation/fixtures/CopyFixture.h3
-rw-r--r--tests/validation/fixtures/CropResizeFixture.h3
-rw-r--r--tests/validation/fixtures/DeconvolutionLayerFixture.h17
-rw-r--r--tests/validation/fixtures/DepthConvertLayerFixture.h7
-rw-r--r--tests/validation/fixtures/DepthToSpaceLayerFixture.h3
-rw-r--r--tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h332
-rw-r--r--tests/validation/fixtures/DequantizationLayerFixture.h3
-rw-r--r--tests/validation/fixtures/DirectConvolution3DFixture.h189
-rw-r--r--tests/validation/fixtures/DirectConvolutionLayerFixture.h150
-rw-r--r--tests/validation/fixtures/DropoutLayerFixture.h3
-rw-r--r--tests/validation/fixtures/ElementwiseOperationsFixture.h261
-rw-r--r--tests/validation/fixtures/ElementwiseUnaryFixture.h246
-rw-r--r--tests/validation/fixtures/FFTFixture.h9
-rw-r--r--tests/validation/fixtures/FillFixture.h3
-rw-r--r--tests/validation/fixtures/FlattenLayerFixture.h3
-rw-r--r--tests/validation/fixtures/FloorFixture.h3
-rw-r--r--tests/validation/fixtures/FullyConnectedLayerFixture.h372
-rw-r--r--tests/validation/fixtures/FuseBatchNormalizationFixture.h3
-rw-r--r--tests/validation/fixtures/GEMMFixture.h342
-rw-r--r--tests/validation/fixtures/GEMMInterleave4x4Fixture.h10
-rw-r--r--tests/validation/fixtures/GEMMLowpFixture.h927
-rw-r--r--tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h3
-rw-r--r--tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h3
-rw-r--r--tests/validation/fixtures/GEMMTranspose1xWFixture.h8
-rw-r--r--tests/validation/fixtures/GatherFixture.h11
-rw-r--r--tests/validation/fixtures/Im2ColFixture.h14
-rw-r--r--tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h122
-rw-r--r--tests/validation/fixtures/InstanceNormalizationLayerFixture.h3
-rw-r--r--tests/validation/fixtures/L2NormalizeLayerFixture.h3
-rw-r--r--tests/validation/fixtures/LSTMLayerFixture.h10
-rw-r--r--tests/validation/fixtures/LogicalFixture.h4
-rw-r--r--tests/validation/fixtures/MatMulFixture.h612
-rw-r--r--tests/validation/fixtures/MatMulKernelFixture.h390
-rw-r--r--tests/validation/fixtures/MaxUnpoolingLayerFixture.h4
-rw-r--r--tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h40
-rw-r--r--tests/validation/fixtures/NonMaxSuppressionFixture.h3
-rw-r--r--tests/validation/fixtures/NormalizationLayerFixture.h4
-rw-r--r--tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h5
-rw-r--r--tests/validation/fixtures/PadLayerFixture.h3
-rw-r--r--tests/validation/fixtures/PermuteFixture.h3
-rw-r--r--tests/validation/fixtures/PixelWiseMultiplicationFixture.h54
-rw-r--r--tests/validation/fixtures/Pooling3dLayerFixture.h164
-rw-r--r--tests/validation/fixtures/PoolingLayerFixture.h16
-rw-r--r--tests/validation/fixtures/PriorBoxLayerFixture.h3
-rw-r--r--tests/validation/fixtures/QLSTMLayerNormalizationFixture.h3
-rw-r--r--tests/validation/fixtures/QuantizationLayerFixture.h4
-rw-r--r--tests/validation/fixtures/RNNLayerFixture.h3
-rw-r--r--tests/validation/fixtures/ROIAlignLayerFixture.h5
-rw-r--r--tests/validation/fixtures/ROIPoolingLayerFixture.h5
-rw-r--r--tests/validation/fixtures/RangeFixture.h3
-rw-r--r--tests/validation/fixtures/ReduceMeanFixture.h15
-rw-r--r--tests/validation/fixtures/ReductionOperationFixture.h11
-rw-r--r--tests/validation/fixtures/RemapFixture.h168
-rw-r--r--tests/validation/fixtures/ReorderFixture.h123
-rw-r--r--tests/validation/fixtures/ReorgLayerFixture.h3
-rw-r--r--tests/validation/fixtures/ReshapeLayerFixture.h43
-rw-r--r--tests/validation/fixtures/ReverseFixture.h52
-rw-r--r--tests/validation/fixtures/ScaleFixture.h80
-rw-r--r--tests/validation/fixtures/ScatterLayerFixture.h254
-rw-r--r--tests/validation/fixtures/ScharrFixture.h3
-rw-r--r--tests/validation/fixtures/SelectFixture.h3
-rw-r--r--tests/validation/fixtures/SliceOperationsFixtures.h4
-rw-r--r--tests/validation/fixtures/SoftmaxLayerFixture.h5
-rw-r--r--tests/validation/fixtures/SpaceToBatchFixture.h5
-rw-r--r--tests/validation/fixtures/SpaceToDepthFixture.h10
-rw-r--r--tests/validation/fixtures/SplitFixture.h4
-rw-r--r--tests/validation/fixtures/StackLayerFixture.h37
-rw-r--r--tests/validation/fixtures/TileFixture.h3
-rw-r--r--tests/validation/fixtures/TransposeFixture.h13
-rw-r--r--tests/validation/fixtures/UNIT/DynamicTensorFixture.h5
-rw-r--r--tests/validation/fixtures/UNIT/WeightsRetentionFixture.h20
-rw-r--r--tests/validation/fixtures/UnstackFixture.h3
-rw-r--r--tests/validation/fixtures/WeightsReshapeFixture.h19
-rw-r--r--tests/validation/fixtures/WinogradConvolutionLayerFixture.h14
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h246
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h411
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h273
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h297
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h188
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h207
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h186
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h171
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h239
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h137
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h272
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h158
-rw-r--r--tests/validation/reference/ActivationLayer.cpp27
-rw-r--r--tests/validation/reference/ActivationLayer.h29
-rw-r--r--tests/validation/reference/BatchToSpaceLayer.cpp51
-rw-r--r--tests/validation/reference/BatchToSpaceLayer.h5
-rw-r--r--tests/validation/reference/Conv3D.cpp260
-rw-r--r--tests/validation/reference/Conv3D.h47
-rw-r--r--tests/validation/reference/Convolution3d.h4
-rw-r--r--tests/validation/reference/DFT.cpp8
-rw-r--r--tests/validation/reference/DepthConvertLayer.cpp23
-rw-r--r--tests/validation/reference/DequantizationLayer.cpp9
-rw-r--r--tests/validation/reference/ElementwiseOperations.cpp11
-rw-r--r--tests/validation/reference/ElementwiseUnary.cpp109
-rw-r--r--tests/validation/reference/ElementwiseUnary.h4
-rw-r--r--tests/validation/reference/FullyConnectedLayer.cpp4
-rw-r--r--tests/validation/reference/GEMM.cpp95
-rw-r--r--tests/validation/reference/GEMM.h11
-rw-r--r--tests/validation/reference/GEMMLowp.cpp12
-rw-r--r--tests/validation/reference/GEMMLowp.h11
-rw-r--r--tests/validation/reference/Gather.cpp53
-rw-r--r--tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp98
-rw-r--r--tests/validation/reference/IndirectConv2dAddressPrecalculation.h44
-rw-r--r--tests/validation/reference/MeanStdDevNormalizationLayer.cpp11
-rw-r--r--tests/validation/reference/Permute.cpp18
-rw-r--r--tests/validation/reference/Pooling3dLayer.cpp220
-rw-r--r--tests/validation/reference/Pooling3dLayer.h50
-rw-r--r--tests/validation/reference/PoolingLayer.cpp21
-rw-r--r--tests/validation/reference/QuantizationLayer.cpp2
-rw-r--r--tests/validation/reference/ReductionOperation.cpp79
-rw-r--r--tests/validation/reference/ReductionOperation.h7
-rw-r--r--tests/validation/reference/Remap.cpp112
-rw-r--r--tests/validation/reference/Reorder.cpp156
-rw-r--r--tests/validation/reference/Reorder.h (renamed from tests/validation/reference/Remap.h)12
-rw-r--r--tests/validation/reference/ReshapeLayer.cpp15
-rw-r--r--tests/validation/reference/Reverse.cpp35
-rw-r--r--tests/validation/reference/Reverse.h10
-rw-r--r--tests/validation/reference/Scale.cpp20
-rw-r--r--tests/validation/reference/Scale.h4
-rw-r--r--tests/validation/reference/ScatterLayer.cpp152
-rw-r--r--tests/validation/reference/ScatterLayer.h48
-rw-r--r--tests/validation/reference/UtilsQuantizedAsymm.h28
293 files changed, 27711 insertions, 5676 deletions
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index fa95594157..133b39d154 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -70,6 +70,7 @@ AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activ
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
case ActivationLayerInfo::ActivationFunction::ELU:
case ActivationLayerInfo::ActivationFunction::SQRT:
+ case ActivationLayerInfo::ActivationFunction::GELU:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
case ActivationLayerInfo::ActivationFunction::TANH:
return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 1d849ed0c7..8566972f81 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,15 +22,11 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
-#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ShapeDatasets.h"
-#include "tests/datasets/SplitDataset.h"
-#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/ArgMinMaxFixture.h"
@@ -46,6 +42,8 @@ namespace
const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
{
TensorShape{ 1U, 7U, 1U, 3U },
+ TensorShape{ 3U, 1U, 3U, 2U },
+ TensorShape{ 2U, 1U, 3U, 2U },
TensorShape{ 149U, 5U, 1U, 2U },
TensorShape{ 166U, 5U, 1U, 2U },
TensorShape{ 322U, 5U, 1U, 2U },
@@ -53,6 +51,22 @@ const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
TensorShape{ 2560, 2U, 2U, 2U },
});
+const auto ArgMinMaxSmallDatasetAxis0 = framework::dataset::make("Shape",
+{
+ TensorShape{ 1U, 5U },
+ TensorShape{ 2U, 3U },
+ TensorShape{ 1U },
+ TensorShape{ 3U },
+ TensorShape{ 2U },
+ TensorShape{ 5U },
+ TensorShape{ 17U },
+ TensorShape{ 15U, 2U },
+});
+
+const auto OpsDataset = framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX });
+const auto AxisDataset = framework::dataset::make("Axis", { 0, 1, 2, 3 });
+const auto QInfoDataset = framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) });
+
const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape",
{ TensorShape{ 517U, 123U, 13U, 2U } });
} // namespace
@@ -85,47 +99,78 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
// clang-format on
// *INDENT-ON*
-template <typename T>
-using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>;
+
+using CLArgMinMaxValidationFixture_S32_S32 = CLArgMinMaxValidationFixture<int32_t, int32_t>;
+using CLArgMinMaxValidationFixture_F16_S32 = CLArgMinMaxValidationFixture<half, int32_t>;
+using CLArgMinMaxValidationFixture_F32_S32 = CLArgMinMaxValidationFixture<float, int32_t>;
+using CLArgMinMaxValidationFixture_F32_S64 = CLArgMinMaxValidationFixture<float, int64_t>;
TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmallAxis0,
+ CLArgMinMaxValidationFixture_S32_S32,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(ArgMinMaxSmallDatasetAxis0,
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ framework::dataset::make("Axis", { 0 })),
+ OpsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunSmall,
- CLArgMinMaxValidationFixture<int32_t>,
+ CLArgMinMaxValidationFixture_S32_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge,
- CLArgMinMaxValidationFixture<int32_t>,
+ CLArgMinMaxValidationFixture_S32_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset,
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
+
TEST_SUITE_END() // S32
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall,
- CLArgMinMaxValidationFixture<half>,
+ CLArgMinMaxValidationFixture_F16_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::F16)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge,
- CLArgMinMaxValidationFixture<half>,
+ CLArgMinMaxValidationFixture_F16_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset,
+ framework::dataset::make("DataTypeIn", DataType::F16)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -134,49 +179,77 @@ TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall,
- CLArgMinMaxValidationFixture<float>,
+ CLArgMinMaxValidationFixture_F32_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall_F32_S64,
+ CLArgMinMaxValidationFixture_F32_S64,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S64)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge,
- CLArgMinMaxValidationFixture<float>,
+ CLArgMinMaxValidationFixture_F32_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset,
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
-template <typename T>
-using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>;
+
+using CLArgMinMaxQuantizedValidationFixture_U8_S32 = CLArgMinMaxQuantizedValidationFixture<uint8_t, int32_t>;
+using CLArgMinMaxQuantizedValidationFixture_S8_S32 = CLArgMinMaxQuantizedValidationFixture<int8_t, int32_t>;
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall,
- CLArgMinMaxQuantizedValidationFixture<uint8_t>,
+ CLArgMinMaxQuantizedValidationFixture_U8_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-
FIXTURE_DATA_TEST_CASE(RunLarge,
- CLArgMinMaxQuantizedValidationFixture<uint8_t>,
+ CLArgMinMaxQuantizedValidationFixture_U8_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxLargeDataset,
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -185,28 +258,32 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall,
- CLArgMinMaxQuantizedValidationFixture<int8_t>,
+ CLArgMinMaxQuantizedValidationFixture_S8_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxSmallDataset,
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-
FIXTURE_DATA_TEST_CASE(RunLarge,
- CLArgMinMaxQuantizedValidationFixture<int8_t>,
+ CLArgMinMaxQuantizedValidationFixture_S8_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxLargeDataset,
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // ArgMinMax
TEST_SUITE_END() // CL
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index c74f6a3b23..1ed3a105dc 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,26 +41,12 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Add.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
/** Input data sets **/
-const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
- DataType::U8));
-const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataType",
- DataType::QASYMM8));
-const auto ArithmeticAdditionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED));
-const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
- framework::dataset::make("DataType",
- DataType::QSYMM16));
-const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
- framework::dataset::make("DataType", DataType::S16));
-const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataType", DataType::F16));
-const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataType", DataType::F32));
const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{ ActivationLayerInfo() });
const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -68,6 +54,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -76,22 +64,19 @@ TEST_SUITE(ArithmeticAddition)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
- framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
@@ -129,8 +114,10 @@ using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::U8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -138,15 +125,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework
TEST_SUITE_END() // U8
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -159,48 +150,65 @@ using CLArithmeticAdditionQuantizedFixture = ArithmeticAdditionValidationQuantiz
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticAdditionQASYMM8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
template <typename T>
using CLArithmeticAdditionBroadcastQuantizedFixture = ArithmeticAdditionValidationQuantizedBroadcastFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
- ArithmeticAdditionQASYMM8Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
- framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticAdditionQASYMM8SignedDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticAdditionQSYMM16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -213,16 +221,21 @@ using CLArithmeticAdditionFloatFixture = ArithmeticAdditionValidationFloatFixtur
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::F16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
+ framework::dataset::make("DataType",
+ DataType::F16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -230,24 +243,32 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>
TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType",
+ DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
+ framework::dataset::make("DataType",
+ DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -256,27 +277,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framew
template <typename T>
using CLArithmeticAdditionBroadcastFloatFixture = ArithmeticAdditionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
- ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
- ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
- ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp
index 9dcdfb83e1..94bacba7e5 100644
--- a/tests/validation/CL/ArithmeticDivision.cpp
+++ b/tests/validation/CL/ArithmeticDivision.cpp
@@ -59,6 +59,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("Activatio
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -95,14 +97,16 @@ using CLArithmeticDivisionIntegerFixture = ArithmeticDivisionValidationIntegerFi
TEST_SUITE(Integer)
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmallInteger, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmallInteger, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunIntegerWithActivation, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunIntegerWithActivation, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+ ActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -116,14 +120,16 @@ using CLArithmeticDivisionFloatFixture = ArithmeticDivisionValidationFloatFixtur
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset),
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset),
+ ActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -131,21 +137,24 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset),
+ ActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -154,24 +163,27 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framew
template <typename T>
using CLArithmeticDivisionBroadcastFloatFixture = ArithmeticDivisionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
ArithmeticDivisionFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ArithmeticDivisionFP32Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
ArithmeticDivisionFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 2709fcaedb..5825ce2e5d 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,27 +41,12 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Sub.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
/** Input data sets **/
-const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("DataType",
- DataType::U8));
-const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataType",
- DataType::QASYMM8));
-const auto ArithmeticSubtractionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED));
-const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
- framework::dataset::make("DataType",
- DataType::QSYMM16));
-const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
- framework::dataset::make("DataType", DataType::S16));
-const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataType", DataType::F16));
-const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataType", DataType::F32));
const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{ ActivationLayerInfo() });
const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -79,22 +64,19 @@ TEST_SUITE(ArithmeticSubtraction)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
- framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
@@ -159,7 +141,8 @@ using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CL
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::U8)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -169,7 +152,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framew
TEST_SUITE_END() // U8
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -177,7 +161,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framew
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -193,11 +178,22 @@ using CLArithmeticSubtractionQuantizedFixture = ArithmeticSubtractionValidationQ
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticSubtractionQASYMM8Dataset),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyInPlace, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 255.f, 20) })),
InPlaceDataSet))
{
// Validate output
@@ -206,12 +202,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticSubtractionQASYMM8SignedDataset),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -219,7 +215,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t>
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticSubtractionQSYMM16Dataset),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
@@ -237,7 +233,8 @@ using CLArithmeticSubtractionFloatFixture = ArithmeticSubtractionValidationFloat
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::F16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
EmptyActivationFunctionsDataset),
OutOfPlaceDataSet))
@@ -246,7 +243,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, fram
validate(CLAccessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
- ArithmeticSubtractionFP16Dataset),
+ framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
ActivationFunctionsDataset),
InPlaceDataSet))
@@ -258,7 +255,7 @@ TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
EmptyActivationFunctionsDataset),
OutOfPlaceDataSet))
@@ -267,7 +264,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, fra
validate(CLAccessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
ActivationFunctionsDataset),
InPlaceDataSet))
@@ -277,7 +274,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<fl
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
EmptyActivationFunctionsDataset),
OutOfPlaceDataSet))
@@ -290,7 +287,7 @@ template <typename T>
using CLArithmeticSubtractionBroadcastFloatFixture = ArithmeticSubtractionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
EmptyActivationFunctionsDataset),
OutOfPlaceDataSet))
@@ -298,8 +295,18 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatF
// Validate output
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInplace, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ EmptyActivationFunctionsDataset),
+ InPlaceDataSet))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
ActivationFunctionsDataset),
OutOfPlaceDataSet))
@@ -309,7 +316,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadc
}
FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
EmptyActivationFunctionsDataset),
OutOfPlaceDataSet))
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
index 8b3bdbc3ea..3b87b9d1b5 100644
--- a/tests/validation/CL/BatchNormalizationLayer.cpp
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,7 @@ namespace
{
RelativeTolerance<float> rel_tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
const auto act_infos = framework::dataset::make("ActivationInfo",
{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp
index e90ac921c5..ca12b76e8a 100644
--- a/tests/validation/CL/BatchToSpaceLayer.cpp
+++ b/tests/validation/CL/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,56 +50,38 @@ using CLBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<CLTensor,
// *INDENT-OFF*
// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Mismatching data types
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Wrong data type block shape
- TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
- }),
- framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 4U), 1, DataType::S32),
- TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
-
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- })),
- framework::dataset::make("Expected", { true, true,true, false, false, false})),
- input_info, block_shape_info, output_info, expected)
-{
- bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
- ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
-}
-DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx
- TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Mismatching data types
- TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Negative block shapes
- TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blockx > blocky
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blocky > blockx
+ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Mismatching data types
+ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Negative block shapes
+ TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32),// Unsupported tensor rank
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid batch dimension)
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid spatial dimension)
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: correct tensor shape with cropping
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid tensor shape with cropping
}),
- framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })),
- framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })),
+ framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2, 2, 2, 2, 2 })),
+ framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2, 2, 2, 2, 2 })),
+ framework::dataset::make("CropInfo", {
+ CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{3, 2, 1, 3}, CropInfo{3, 2, 1, 3}
+ })),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 16U, 2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16),
TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 32U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(27, 12U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true,true, false, false, false})),
- input_info, block_shape_x, block_shape_y, output_info, expected)
+ framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true, false})),
+ input_info, block_shape_x, block_shape_y, crop_info, output_info, expected)
{
- bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
+ bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false), crop_info));
ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
}
// clang-format on
@@ -114,6 +96,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<float>, framework::D
// Validate output
validate(CLAccessor(_target), _reference);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
@@ -131,6 +123,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<half>, framework::Da
// Validate output
validate(CLAccessor(_target), _reference);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp
index 2ca8b58040..2f943e84d8 100644
--- a/tests/validation/CL/Cast.cpp
+++ b/tests/validation/CL/Cast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,10 @@ constexpr AbsoluteTolerance<float> one_tolerance(1);
constexpr AbsoluteTolerance<float> zero_tolerance(0);
/** Input data sets **/
+// QASYMM8
+const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32));
+const auto CastQSYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QSYMM8), framework::dataset::make("DataType", DataType::F32));
+
// U8
const auto CastU8toS8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8));
const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
@@ -119,6 +123,26 @@ const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", Da
const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32));
const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+
+// U64
+const auto CastU64toU8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U8));
+const auto CastU64toS8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S8));
+const auto CastU64toU16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U16));
+const auto CastU64toS16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S16));
+const auto CastU64toU32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U32));
+const auto CastU64toS32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S32));
+const auto CastU64toF16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F16));
+const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32));
+
+// S64
+const auto CastS64toU8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U8));
+const auto CastS64toS8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S8));
+const auto CastS64toU16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U16));
+const auto CastS64toS16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S16));
+const auto CastS64toU32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U32));
+const auto CastS64toS32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S32));
+const auto CastS64toF16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F16));
+const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32));
} // namespace
TEST_SUITE(CL)
@@ -149,6 +173,12 @@ using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T
} \
TEST_SUITE_END()
+// QASYMM8
+CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastQASYMM8toF32Dataset, zero_tolerance)
+// QSYMM8
+CAST_SUITE(QSYMM8_to_F32, DataType::QSYMM8, DataType::F32, CLCastToF32Fixture<int8_t>, CastQSYMM8toF32Dataset, zero_tolerance)
+
+
// U8
CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance)
CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance)
@@ -221,6 +251,26 @@ CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>,
CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)
+// S64
+CAST_SUITE(S64_to_U8, DataType::S64, DataType::U8, CLCastToU8Fixture<int64_t>, CastS64toU8Dataset, one_tolerance)
+CAST_SUITE(S64_to_S8, DataType::S64, DataType::S8, CLCastToS8Fixture<int64_t>, CastS64toS8Dataset, one_tolerance)
+CAST_SUITE(S64_to_U16, DataType::S64, DataType::U16, CLCastToU16Fixture<int64_t>, CastS64toU16Dataset, one_tolerance)
+CAST_SUITE(S64_to_S16, DataType::S64, DataType::S16, CLCastToS16Fixture<int64_t>, CastS64toS16Dataset, one_tolerance)
+CAST_SUITE(S64_to_U32, DataType::S64, DataType::U32, CLCastToU32Fixture<int64_t>, CastS64toU32Dataset, one_tolerance)
+CAST_SUITE(S64_to_S32, DataType::S64, DataType::S32, CLCastToS32Fixture<int64_t>, CastS64toS32Dataset, one_tolerance)
+CAST_SUITE(S64_to_F16, DataType::S64, DataType::F16, CLCastToF16Fixture<int64_t>, CastS64toF16Dataset, zero_tolerance)
+CAST_SUITE(S64_to_F32, DataType::S64, DataType::F32, CLCastToF32Fixture<int64_t>, CastS64toF32Dataset, zero_tolerance)
+
+// U64
+CAST_SUITE(U64_to_U8, DataType::U64, DataType::U8, CLCastToU8Fixture<uint64_t>, CastU64toU8Dataset, one_tolerance)
+CAST_SUITE(U64_to_S8, DataType::U64, DataType::S8, CLCastToS8Fixture<uint64_t>, CastU64toS8Dataset, one_tolerance)
+CAST_SUITE(U64_to_U16, DataType::U64, DataType::U16, CLCastToU16Fixture<uint64_t>, CastU64toU16Dataset, one_tolerance)
+CAST_SUITE(U64_to_S16, DataType::U64, DataType::S16, CLCastToS16Fixture<uint64_t>, CastU64toS16Dataset, one_tolerance)
+CAST_SUITE(U64_to_U32, DataType::U64, DataType::U32, CLCastToU32Fixture<uint64_t>, CastU64toU32Dataset, one_tolerance)
+CAST_SUITE(U64_to_S32, DataType::U64, DataType::S32, CLCastToS32Fixture<uint64_t>, CastU64toS32Dataset, one_tolerance)
+CAST_SUITE(U64_to_F16, DataType::U64, DataType::F16, CLCastToF16Fixture<uint64_t>, CastU64toF16Dataset, zero_tolerance)
+CAST_SUITE(U64_to_F32, DataType::U64, DataType::F32, CLCastToF32Fixture<uint64_t>, CastU64toF32Dataset, zero_tolerance)
+
TEST_SUITE_END() // Cast
TEST_SUITE_END() // CL
} // namespace validation
diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp
index b651bf8918..4b004e2472 100644
--- a/tests/validation/CL/Col2Im.cpp
+++ b/tests/validation/CL/Col2Im.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/gpu/cl/kernels/ClCol2ImKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
@@ -40,7 +40,7 @@ namespace validation
TEST_SUITE(CL)
TEST_SUITE(Col2Im)
-using CLCol2Im = CLSynthetizeFunction<CLCol2ImKernel>;
+using ClCol2Im = ClSynthetizeOperatorWithBorder<opencl::kernels::ClCol2ImKernel>;
/** Negative tests
*
@@ -59,7 +59,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::SIZET);
const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32);
const auto conv_size = Size2D(3, 4);
- const auto status = CLCol2ImKernel::validate(&input, &output, conv_size);
+ const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -68,7 +68,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32);
const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC);
const auto conv_size = Size2D(3, 4);
- const auto status = CLCol2ImKernel::validate(&input, &output, conv_size);
+ const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -77,13 +77,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32);
const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 2U, 2U), 1, DataType::F32);
const auto conv_size = Size2D(3, 4);
- const auto status = CLCol2ImKernel::validate(&input, &output, conv_size);
+ const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
}
template <typename T>
-using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im, T, true>;
+using ClCol2ImFixture = Col2ImOpValidationFixture<CLTensor, CLAccessor, ClCol2Im, T, true>;
/** Test kernel for single-precision floating point
*
@@ -99,7 +99,7 @@ using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im,
* Kernel tested col2im
*/
FIXTURE_DATA_TEST_CASE(FP32,
- CLCol2ImFixture<float>,
+ ClCol2ImFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(
framework::dataset::make("InputShape", { TensorShape(8U, 16U, 3U, 1U), TensorShape(17U, 16U, 3U, 1U), TensorShape(7U, 16U, 3U, 1U) }),
@@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(FP32,
* Kernel tested col2im
*/
FIXTURE_DATA_TEST_CASE(F16,
- CLCol2ImFixture<half>,
+ ClCol2ImFixture<half>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)),
@@ -151,7 +151,7 @@ FIXTURE_DATA_TEST_CASE(F16,
* Kernel tested col2im
*/
FIXTURE_DATA_TEST_CASE(QASYMM8,
- CLCol2ImFixture<uint8_t>,
+ ClCol2ImFixture<uint8_t>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)),
diff --git a/tests/validation/CL/Comparisons.cpp b/tests/validation/CL/Comparisons.cpp
index d015528b0e..dd3dbd8d59 100644
--- a/tests/validation/CL/Comparisons.cpp
+++ b/tests/validation/CL/Comparisons.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,7 +59,7 @@ TEST_SUITE(Comparison)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid output type
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching input types
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
}),
@@ -75,7 +75,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
})),
- framework::dataset::make("Expected", { false, false, false, false, true})),
+ framework::dataset::make("Expected", { false, false, true, false, true})),
input1_info, input2_info, output_info, expected)
{
Status s = CLComparison::validate(&input1_info.clone()->set_is_resizable(false),
diff --git a/tests/validation/CL/Convolution3D.cpp b/tests/validation/CL/Convolution3D.cpp
new file mode 100644
index 0000000000..a2848560c3
--- /dev/null
+++ b/tests/validation/CL/Convolution3D.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConv3D.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolution3DFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const RelativeTolerance<half> rel_tolerance_fp16(half(0.2)); /**< Relative tolerance for FP16 tests */
+constexpr float abs_tolerance_fp16(0.05f); /**< Absolute tolerance for FP16 tests */
+constexpr RelativeTolerance<float> rel_tolerance_fp32(0.05f); /**< Relative tolerance for FP32 tests */
+constexpr float abs_tolerance_fp32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+constexpr AbsoluteTolerance<uint8_t> abs_tolerance_qasymm8(1); /**< Absolute tolerance for quantized tests */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DirectConvolution3D)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 5U, 3U), // Unsupported data layout
+ TensorShape(27U, 13U, 5U, 3U), // Unsupported activation enabled
+ TensorShape(27U, 13U, 5U, 3U), // Mismatching data type
+ TensorShape(27U, 13U, 5U, 3U), // Unsupported data type
+ TensorShape(27U, 13U, 5U, 3U), // Mismatching input feature maps
+ TensorShape(27U, 13U, 5U, 3U), // Mismatching output feature maps
+ TensorShape(27U, 13U, 5U, 3U), // Mismatching bias shape
+ TensorShape(27U, 13U, 5U, 3U), // Unsupported number of weights dimensions
+ TensorShape(27U, 13U, 5U, 3U), // Unsupported number of biases dimensions
+ TensorShape(27U, 13U, 5U, 3U), // Mismatching output shape
+ TensorShape(27U, 13U, 5U, 3U)
+ }),
+ framework::dataset::make("WeightsShape", { TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 32U, 3U, 3U, 3U),
+ TensorShape(8U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U, 2U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U),
+ TensorShape(4U, 27U, 3U, 3U, 3U)
+ })),
+ framework::dataset::make("BiasesShape", { TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(8U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U),
+ TensorShape(4U)
+ })),
+ framework::dataset::make("OutputShape", { TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U, 2U),
+ TensorShape(4U, 11U, 5U, 3U),
+ TensorShape(4U, 13U, 5U, 3U)
+ })),
+ framework::dataset::make("Conv3dInfo", { Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+ Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false)
+ })),
+ framework::dataset::make("SrcDataType", { DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::U32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32
+ })),
+ framework::dataset::make("WeightsDataType", { DataType::F32,
+ DataType::F32,
+ DataType::F16,
+ DataType::U32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32,
+ DataType::F32
+ })),
+ framework::dataset::make("DataLayout", { DataLayout::NCDHW,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC,
+ DataLayout::NDHWC
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+ input_shape, weights_shape, biases_shape, output_shape, conv3d_info, src_data_type, weights_data_type, data_layout, expected)
+{
+ TensorInfo input_info = TensorInfo(input_shape, 1, src_data_type);
+ TensorInfo weights_info = TensorInfo(weights_shape, 1, weights_data_type);
+ TensorInfo biases_info = TensorInfo(biases_shape, 1, src_data_type);
+ TensorInfo output_info = TensorInfo(output_shape, 1, src_data_type);
+
+ input_info.set_data_layout(data_layout);
+ weights_info.set_data_layout(data_layout);
+ biases_info.set_data_layout(data_layout);
+ output_info.set_data_layout(data_layout);
+
+ bool is_valid = bool(CLConv3D::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv3d_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using CLDirectConvolution3DFixture = DirectConvolution3DValidationFixture<CLTensor, CLAccessor, CLConv3D, T>;
+template <typename T>
+using CLDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture<CLTensor, CLAccessor, CLConv3D, T>;
+
+TEST_SUITE(NDHWC)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)))
+{
+ validate(CLAccessor(_target), _reference, rel_tolerance_fp16, tolerance_num, abs_tolerance_fp16);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)))
+{
+ validate(CLAccessor(_target), _reference, rel_tolerance_fp32, 0.0, abs_tolerance_fp32);
+}
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+ framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+ framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+ framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+ validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+ framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+ framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+ framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+ validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // NDHWC
+TEST_SUITE_END() // DirectConvolution3D
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 31eed7646c..8820a6a31e 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
@@ -37,12 +38,16 @@
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
+ * Please check there for any differences in the coverage
+ */
namespace arm_compute
{
namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
class SmallConvolutionLayerDatasetCases final : public datasets::ConvolutionLayerDataset
@@ -61,32 +66,32 @@ constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< T
constexpr float tolerance_num = 0.07f; /**< Tolerance number */
/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
{
DataType::F16,
- DataType::F32,
- DataType::QASYMM8,
- DataType::QASYMM8_SIGNED,
+ DataType::F32,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
});
/** Grouped CNN data types */
-const auto GroupedCNNDataTypes = framework::dataset::make("DataType",
+const auto GroupedCNNDataTypes = make("DataType",
{
DataType::F16,
- DataType::F32
+ DataType::F32
});
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
});
-const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
});
} // namespace
@@ -96,7 +101,7 @@ TEST_SUITE(ConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM
+ make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM
TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM
TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Select GEMM
TensorInfo(TensorShape(23U, 27U, 31U, 4U), 1, DataType::F32), // Select WINOGRAD
@@ -106,7 +111,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM
TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::QASYMM8_SIGNED), // Select GEMM
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+ make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 31U, 21U), 1, DataType::F32),
@@ -116,7 +121,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::QASYMM8_SIGNED),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
@@ -126,7 +131,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32),
TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::QASYMM8_SIGNED),
})),
- framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
+ make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
PadStrideInfo(1, 2, 1, 1),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -136,7 +141,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
PadStrideInfo(1, 1, 2, 2),
PadStrideInfo(1, 1, 2, 2),
})),
- framework::dataset::make("GpuTarget", { GPUTarget::BIFROST,
+ make("GpuTarget", { GPUTarget::BIFROST,
GPUTarget::MIDGARD,
GPUTarget::G71,
GPUTarget::G71,
@@ -146,7 +151,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
GPUTarget::BIFROST,
GPUTarget::BIFROST,
})),
- framework::dataset::make("Dilation", { Size2D(1U, 1U),
+ make("Dilation", { Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
@@ -156,8 +161,8 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
Size2D(2U, 1U),
Size2D(2U, 1U),
})),
- framework::dataset::make("EnableFastMath", { false, false, false, false, false, false, true, true, true })),
- framework::dataset::make("Expected",{ ConvolutionMethod::GEMM,
+ make("EnableFastMath", { false, false, false, false, false, false, true, true, true })),
+ make("Expected",{ ConvolutionMethod::GEMM,
ConvolutionMethod::GEMM,
ConvolutionMethod::GEMM,
ConvolutionMethod::WINOGRAD,
@@ -188,15 +193,14 @@ template <typename T>
using CLGEMMConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
template <typename T>
using CLGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, true>;
+template <typename T>
+using CLConvolutionValidationWithPaddingFixture = ConvolutionValidationWithPaddingFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsSmallDataset))
{
// Validate output
@@ -207,31 +211,40 @@ TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType",DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsSmallDataset))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("Input", TensorShape(23U, 27U, 5U)),
+ make("Weights", TensorShape(3U, 3U, 5U, 2U))),
+ make("Bias", TensorShape(2U))),
+ make("Output", TensorShape(11U, 25U, 2U))),
+ make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
+ make("Dilation", Size2D(1, 1))),
+ make("ReshapeWeights", { true })),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
+FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, CLConvolutionValidationWithPaddingFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerPrePaddingDataset(),
+ make("ReshapeWeights", { true })),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })),
+make("PrePadLayer", { PaddingList({ { 1, 1 }, { 1, 1 } }) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
@@ -242,64 +255,108 @@ using CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidat
template <typename T>
using CLGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, int8_t>;
-const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
-{
- ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
-});
-const auto QuantizedActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
-{
- ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
-});
-
TEST_SUITE(Quantized)
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto QuantizationData = make("QuantizationInfo",
{
QuantizationInfo(0.5f, 10),
QuantizationInfo(0.3f, 3),
QuantizationInfo(1.1f, 10),
});
+
+/// @note: Every asymmetric quantized test has a version with or without activation because the quantization info given
+/// is ignored when there is no activation. Instead of using the same quantization information for all the tensors, the
+/// fixture generates separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
+/// again, with the explicitly specified quantization info removed
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
+const auto QuantizedActivationFunctionsSmallDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
+});
+
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmallCases, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(SmallConvolutionLayerDatasetCases(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(SmallConvolutionLayerDatasetCases(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ IgnoredQuantizationInfo,
+ NoActivation))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallCasesWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(SmallConvolutionLayerDatasetCases(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ QuantizationData,
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(datasets::SmallConvolutionLayerDataset(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ IgnoredQuantizationInfo,
+ NoActivation))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(datasets::SmallConvolutionLayerDataset(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ QuantizationData,
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(
+ make("Input", TensorShape(23U, 27U, 5U)),
+ make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+ make("Bias", TensorShape(2U)),
+ make("Output", TensorShape(11U, 25U, 2U)),
+ make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+ make("Dilation", Size2D(1, 1)),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ IgnoredQuantizationInfo,
+ NoActivation))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(
+ make("Input", TensorShape(23U, 27U, 5U)),
+ make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+ make("Bias", TensorShape(2U)),
+ make("Output", TensorShape(11U, 25U, 2U)),
+ make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+ make("Dilation", Size2D(1, 1)),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ QuantizationData,
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -307,44 +364,78 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedD
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(datasets::SmallConvolutionLayerDataset(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ IgnoredQuantizationInfo,
+ NoActivation))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(datasets::SmallConvolutionLayerDataset(),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ QuantizationData,
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(
+ make("Input", TensorShape(23U, 27U, 5U)),
+ make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+ make("Bias", TensorShape(2U)),
+ make("Output", TensorShape(11U, 25U, 2U)),
+ make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+ make("Dilation", Size2D(1, 1)),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ IgnoredQuantizationInfo,
+ NoActivation))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(
+ make("Input", TensorShape(23U, 27U, 5U)),
+ make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+ make("Bias", TensorShape(2U)),
+ make("Output", TensorShape(11U, 25U, 2U)),
+ make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+ make("Dilation", Size2D(1, 1)),
+ make("ReshapeWeights", { true }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ QuantizationData,
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM8_PER_CHANNEL)
+const auto QuantizedActivationFunctionsSmallPerChannelDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
+});
+
FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ReshapeWeights", { true })),
+ make("DataType", { DataType::QASYMM8_SIGNED })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
QuantizationData),
- QuantizedActivationFunctionsSmallDataset),
- framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+ QuantizedActivationFunctionsSmallPerChannelDataset),
+ make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -352,12 +443,12 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannel
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", { DataType::QASYMM8 })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ReshapeWeights", { true })),
+ make("DataType", { DataType::QASYMM8 })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
QuantizationData),
- QuantizedActivationFunctionsSmallDataset),
- framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+ QuantizedActivationFunctionsSmallPerChannelDataset),
+ make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -376,9 +467,7 @@ TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW })),
ActivationFunctionsSmallDataset))
{
// Validate output
@@ -387,9 +476,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, fr
FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ make("ReshapeWeights", { true })),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW })),
ActivationFunctionsDataset))
{
// Validate output
@@ -400,9 +489,7 @@ TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW })),
ActivationFunctionsSmallDataset))
{
// Validate output
@@ -411,9 +498,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, fra
FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ make("ReshapeWeights", { true })),
+ make("DataType", DataType::F16)),
+ make("DataLayout", { DataLayout::NCHW })),
ActivationFunctionsDataset))
{
// Validate output
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 15962b588d..d1508fd902 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,27 +53,29 @@ const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorSh
*framework::dataset::make("PadLeft", 3)
*framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
-const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
- 2)
- *framework::dataset::make("PadLeft", 3)
- *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
-
-const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
- * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
-
-const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 1, 3)
* framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 1, 2)
+ * framework::dataset::make("PadY", 1, 3) * framework::dataset::make("NumKernels", { 3 });
+
const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
* framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
* framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
+const auto data3x3_precommit_large_channels = datasets::SmallDeconvolutionShapesWithLargerChannels() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2)
+ * framework::dataset::make("PadX", 1)
+ * framework::dataset::make("PadY", 2) * framework::dataset::make("NumKernels", { 5 });
+
const auto data2x2_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2) * framework::dataset::make("PadX", 1)
* framework::dataset::make("PadY", 1) * framework::dataset::make("NumKernels", { 3 });
-const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 0, 1)
+ * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+
+const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 0, 1)
* framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
@@ -90,9 +92,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights shape
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), // Non supported data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape
TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type
+ TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Invalid weights shape
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, DataLayout::NHWC), // Non supported data type
+ TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Invalid bias shape
+ TensorInfo(TensorShape(4U, 11U, 13U, 3U), 1, DataType::F32, DataLayout::NHWC), // Window shrink
+ TensorInfo(TensorShape(2U, 16U, 32U), 1, DataType::F32, DataLayout::NHWC),
}),
framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
@@ -100,6 +108,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 2U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 2U, 2U, 4U), 1, DataType::F32, DataLayout::NHWC),
})),
framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16),
TensorInfo(TensorShape(1U), 1, DataType::F32),
@@ -107,6 +121,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
TensorInfo(TensorShape(1U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
})),
framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
@@ -114,6 +134,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 11U, 25U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 10U, 25U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 9U, 11U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U, 43U, 91U), 1, DataType::F32, DataLayout::NHWC),
})),
framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -121,8 +147,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 1, 1),
PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 1, 1),
+ PadStrideInfo(3, 3, 2, 2),
})),
- framework::dataset::make("Expected", { false, false, false, false, false, true })),
+ framework::dataset::make("Expected", { false, false, false, false, false, true, // NCHW
+ false, false, false, false, false, true })), // NHWC
input_info, weights_info, bias_info, output_info, pad_info, expected)
{
bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info));
@@ -149,6 +182,9 @@ using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor,
template <typename T>
using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 9, 9>;
+template <typename T>
+using CLDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>;
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
@@ -171,6 +207,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<float>, framewor
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithLargeChannels, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit_large_channels,
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ data_layouts_dataset),
+ framework::dataset::make("AddBias", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+
FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3_asymm, framework::dataset::make("DataType",
DataType::F32)),
data_layouts_dataset),
@@ -180,8 +227,8 @@ FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, fra
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)),
- data_layouts_dataset),
- add_bias_dataset))
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -202,7 +249,7 @@ TEST_SUITE_END() // W2x2
TEST_SUITE(W1x1)
FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)),
data_layouts_dataset),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -218,6 +265,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9<float>, fra
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
TEST_SUITE_END() // W9x9
+
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)),
+ data_layouts_dataset),
+ framework::dataset::make("AddBias", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
@@ -225,7 +283,7 @@ TEST_SUITE(FP16)
TEST_SUITE(W4x4)
FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)),
data_layouts_dataset),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -242,8 +300,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<half>, framework
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)),
- data_layouts_dataset),
- add_bias_dataset))
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -264,13 +322,23 @@ TEST_SUITE_END() // W2x2
TEST_SUITE(W1x1)
FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)),
data_layouts_dataset),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)),
+ data_layouts_dataset),
+ framework::dataset::make("AddBias", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
@@ -287,6 +355,9 @@ template <typename T>
using CLDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
template <typename T>
+using CLDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>;
+
+template <typename T>
using CLDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 4, 4>;
template <typename T>
@@ -298,6 +369,9 @@ using CLDeconvolutionLayerQuantizedPerChannelFixture2x2 = DeconvolutionValidatio
template <typename T>
using CLDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 1, 1>;
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 5, 1>;
+
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
@@ -307,7 +381,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr
data_layouts_dataset),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -327,12 +401,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
- data_layouts_dataset),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 128) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 128), QuantizationInfo(4.f / 255.f, 128) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -358,13 +431,26 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr
data_layouts_dataset),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
+ framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
+ framework::dataset::make("AddBias", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
@@ -378,7 +464,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<int8_t>, fra
data_layouts_dataset),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -400,12 +486,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- data_layouts_dataset),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10), QuantizationInfo(2.f / 255.f, 127) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 64), QuantizationInfo(4.f / 255.f, -128) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -431,13 +516,26 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, fra
data_layouts_dataset),
framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
- add_bias_dataset))
+ framework::dataset::make("AddBias", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
+ framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
+ framework::dataset::make("AddBias", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QASYMM8_SIGNED
const auto input_qinfo_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) });
@@ -448,23 +546,23 @@ const auto output_signed_qinfo_dataset = framework::dataset::make("OutputQuantiz
TEST_SUITE(QSYMM8_PER_CHANNEL)
TEST_SUITE(W4x4)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4,
framework::dataset::make("DataType", DataType::QASYMM8)),
data_layouts_dataset),
input_qinfo_dataset),
output_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { true })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4,
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
data_layouts_dataset),
input_signed_qinfo_dataset),
output_signed_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { true })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
@@ -473,32 +571,57 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFi
TEST_SUITE_END() // W4x4
TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType", DataType::QASYMM8)),
data_layouts_dataset),
input_qinfo_dataset),
output_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { true })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3,
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
data_layouts_dataset),
input_signed_qinfo_dataset),
output_signed_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { true })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmallSignedPrecommit, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(data3x3_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_signed_qinfo_dataset),
+ output_signed_qinfo_dataset),
+ add_bias_dataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
TEST_SUITE_END() // W3x3
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data3x3_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+
TEST_SUITE(W2x2)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data2x2_precommit,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit,
framework::dataset::make("DataType", DataType::QASYMM8)),
data_layouts_dataset),
input_qinfo_dataset),
@@ -509,7 +632,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data2x2_precommit,
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
data_layouts_dataset),
input_signed_qinfo_dataset),
@@ -523,23 +646,23 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFi
TEST_SUITE_END() // W2x2
TEST_SUITE(W1x1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1,
framework::dataset::make("DataType", DataType::QASYMM8)),
data_layouts_dataset),
input_qinfo_dataset),
output_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { false })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1,
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
data_layouts_dataset),
input_signed_qinfo_dataset),
output_signed_qinfo_dataset),
- add_bias_dataset),
+ framework::dataset::make("AddBias", { true })),
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
@@ -547,6 +670,31 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFi
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1,
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ framework::dataset::make("AddBias", { true })),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_signed_qinfo_dataset),
+ output_signed_qinfo_dataset),
+ framework::dataset::make("AddBias", { false })),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QSYMM8_PER_CHANNEL
TEST_SUITE_END() // Quantized
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
index 8f14337b27..490b38ccf6 100644
--- a/tests/validation/CL/DepthConvertLayer.cpp
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -62,7 +62,7 @@ TEST_SUITE(DepthConvertLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid data type combination
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Support upcasting from QASYMM8 to S16
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Invalid shift
@@ -84,7 +84,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
ConvertPolicy::WRAP,
})),
framework::dataset::make("Shift",{ 0, 0, 0, 1, 1, 0, })),
- framework::dataset::make("Expected", { false, false, false, false, false, true})),
+ framework::dataset::make("Expected", { true, false, false, false, false, true})),
input_info, output_info, policy, shift, expected)
{
ARM_COMPUTE_EXPECT(bool(CLDepthConvertLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy, shift)) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index c88f7c1624..d4dbcec9d9 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,6 +41,9 @@ namespace test
{
namespace validation
{
+
+using framework::dataset::make;
+
namespace
{
RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
@@ -48,16 +51,47 @@ constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**<
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
constexpr float tolerance_num = 0.05f; /**< Tolerance number */
-const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5 });
-const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 });
+const auto depth_multipliers = make("DepthMultiplier", { 1, 4 });
+const auto large_depth_multipliers = make("DepthMultiplier", { 2, 5, 8 });
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+// Activation Functions
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f)
+});
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
+});
+
+const auto ActivationFunctionsQuantizedSmallDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f)
+});
+
+const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo",
+{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.f)
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.3f, -1.5f),
});
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
} // namespace
TEST_SUITE(CL)
@@ -65,85 +99,85 @@ TEST_SUITE(DepthwiseConvolutionLayer)
// *INDENT-OFF*
// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
- TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // patch size bigger than input width
- TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // dilation < 1
- TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
- }),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
- })),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
- TensorInfo(TensorShape(2U), 1, DataType::F32),
- TensorInfo(TensorShape(2U), 1, DataType::F32),
- TensorInfo(TensorShape(4U), 1, DataType::F32),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(2U), 1, DataType::F32),
- TensorInfo(TensorShape(16U), 1, DataType::F32),
- TensorInfo(TensorShape(16U), 1, DataType::F32),
- TensorInfo(TensorShape(16U), 1, DataType::F32),
- TensorInfo(TensorShape(24U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
- })),
- framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 1, 0),
- })),
- framework::dataset::make("DepthMultiplier", { 1,
- 1,
- 3,
- 1,
- 1,
- 1,
- 2,
- 2,
- 2,
- 3,
- })),
- framework::dataset::make("Dilation", { Size2D(1U, 1U),
- Size2D(1U, 1U),
- Size2D(1U, 1U),
- Size2D(1U, 1U),
- Size2D(1U, 1U),
- Size2D(1U, 1U),
- Size2D(20U, 1U),
- Size2D(0U, 1U),
- Size2D(1U, 1U),
- Size2D(1U, 1U),
- })),
- framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, true })),
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // patch size bigger than input width
+ TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // dilation < 1
+ TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
+ }),
+ make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
+ }),
+ make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(24U), 1, DataType::S32),
+ }),
+ make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
+ }),
+ make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 1, 0),
+ }),
+ make("DepthMultiplier", { 1,
+ 1,
+ 3,
+ 1,
+ 1,
+ 1,
+ 2,
+ 2,
+ 2,
+ 3,
+ }),
+ make("Dilation", { Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(20U, 1U),
+ Size2D(0U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ }),
+ make("Expected", { false, false, false, false, false, false, false, false, true, true })),
input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, dilation, expected)
{
bool is_valid = bool(CLDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(true), &weights_info.clone()->set_is_resizable(true), &biases_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), conv_info, depth_multiplier,ActivationLayerInfo(), dilation));
@@ -156,48 +190,32 @@ template <typename T>
using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
template <typename T>
using CLDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using CLDepthwiseConvolutionLayerInPlaceFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, false, true>;
TEST_SUITE(Float)
TEST_SUITE(FP16)
TEST_SUITE(W3x3)
TEST_SUITE(NCHW)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
-{
- validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
+ combine(
+ framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+ depth_multipliers,
+ make("DataType", DataType::F16),
+ make("DataLayout", DataLayout::NCHW),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
-{
- validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
+ combine(
+ datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::F16),
+ make("DataLayout", { DataLayout::NCHW }),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
@@ -206,41 +224,42 @@ TEST_SUITE_END() // NCHW
TEST_SUITE(NHWC)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+ combine(
+ datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::F16),
+ make("DataLayout", DataLayout::NHWC),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(
+ datasets::LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset(),
+ large_depth_multipliers,
+ make("DataType", DataType::F16),
+ make("DataLayout", DataLayout::NHWC),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
@@ -251,19 +270,33 @@ TEST_SUITE_END() // W3x3
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDatasetFp16Subset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 2 }),
+ make("DataType", DataType::F16),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
@@ -271,25 +304,38 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, f
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+ combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+ make("DepthMultiplier", { 1 })),
+ make("DataType",
+ DataType::F16)),
+ make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
+{
+ validate(CLAccessor(_src), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // InPlace
TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
@@ -299,94 +345,76 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>,
combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
-{
- validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NCHW)),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
-{
- validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NCHW)),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // NCHW
+
TEST_SUITE(NHWC)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NHWC)),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", { 2 })),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", { 2 })),
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ make("DataLayout", DataLayout::NHWC)),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NHWC)),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
+
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NHWC)),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NHWC)),
- ActivationFunctionsDataset))
+ make("DataLayout", DataLayout::NHWC)),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
@@ -397,19 +425,45 @@ TEST_SUITE_END() // W3x3
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
+
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLargeKernelSize, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(),
+ make("DepthMultiplier", { 1 })),
+ make("DataType",
+ DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 2 }),
+ make("DataType", DataType::F32),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
@@ -417,25 +471,38 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>,
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+ make("DepthMultiplier", { 1 })),
+ make("DataType",
+ DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
+{
+ validate(CLAccessor(_src), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // InPlace
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
@@ -450,47 +517,84 @@ TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 128), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(1.f, 128) }),
+ make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 2U }),
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.1f, 128) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }),
+ make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(1.3f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -498,58 +602,80 @@ TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", { 2 })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", { 2 }),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -560,36 +686,73 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+ make("DataLayout", { DataLayout::NCHW }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- framework::dataset::make("DepthMultiplier", { 2 })),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ make("DepthMultiplier", { 2 }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 2U }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }),
+ make("DataLayout", { DataLayout::NCHW }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -602,24 +765,40 @@ TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
+{
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 2U }),
+ make("SrcDataType", DataType::QASYMM8_SIGNED),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -627,24 +806,24 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -654,24 +833,24 @@ TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
@@ -679,24 +858,24 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("SrcDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
- framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("SrcDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
index f640ee2b18..012018c0fc 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,6 +42,7 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
using namespace arm_compute::misc::shape_calculator;
// Create function for CLDepthwiseConvolutionLayerNativeKernel
@@ -62,64 +63,89 @@ RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.01f));
constexpr float abs_tolerance_f16(0.03f);
/** Width values to test - Precommit */
-const auto width_values_precommit = framework::dataset::make("width", { 1U, 17U, 32U } );
+const auto width_values_precommit = make("width", { 1U, 33U } );
/** Width values to test - Nightly */
-const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } );
+const auto width_values_nightly = make("width", { 53U, 47U } );
/** Height values to test - Precommit */
-const auto height_values_precommit = framework::dataset::make("height", { 19U } );
+const auto height_values_precommit = make("height", { 19U } );
/** Height values to test - Nightly */
-const auto height_values_nightly = framework::dataset::make("height", { 39U, 43U } );
+const auto height_values_nightly = make("height", { 39U, 43U } );
/** Channel values to test - Precommit */
-const auto channel_values_precommit = framework::dataset::make("channels", { 15U });
+const auto channel_values_precommit = make("channels", { 15U });
/** Channel values to test - Nightly */
-const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U });
+const auto channel_values_nightly = make("channels", { 33U, 19U });
+
+/** Channel values to test with cl_image support - Precommit */
+const auto channel_values_export_to_cl_image_precommit = make("channels", { 16U });
+
+/** Channel values to test with cl_image support - Nightly */
+const auto channel_values_export_to_cl_image_nightly = make("channels", { 32U });
/** Batch values to test - Precommit */
-const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U });
+const auto batch_values_precommit = make("batch", { 1U, 2U });
/** Batch values to test - Nightly */
-const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U });
+const auto batch_values_nightly = make("batch", { 3U });
/** Kernel size values to test - Precommit */
-const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) });
+const auto kernel_sz_values_precommit = make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) });
/** Kernel size values to test - Nightly */
-const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
+const auto kernel_sz_values_nightly = make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
/** Depth multiplier values to test - All */
-const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", {3U});
+const auto depth_multiplier_values = make("depth_multiplier", {3U});
/** Dilation values to test - All */
-const auto dilation_values = framework::dataset::make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });
+const auto dilation_values = make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });
/** Stride values to test - All */
-const auto stride_values = framework::dataset::make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });
+const auto stride_values = make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });
-/** Padding values to test - All */
-const auto padding_valid_values = framework::dataset::make("padding_valid", { true, false });
+/** Padding values to test - Precommit */
+const auto padding_valid_values = make("padding_valid", { true, false });
-/** Data type values to test - All */
-const auto data_type_values = framework::dataset::make("data_type", { DataType::F32, DataType::F16 });
+/** Padding values to test - Nightly */
+const auto padding_valid_values_nightly = make("padding_valid", { false });
/** Data layout values to test - All */
-const auto data_layout_values = framework::dataset::make("data_layout", { DataLayout::NHWC });
+const auto data_layout_values = make("data_layout", { DataLayout::NHWC });
/** N0 values to test - Precommit */
-const auto n0_values_precommit = framework::dataset::make("N0", {2, 4});
+const auto n0_values_precommit = make("N0", {2, 4});
/** N0 values to test - Nightly */
-const auto n0_values_nightly = framework::dataset::make("N0", {3, 8});
+const auto n0_values_nightly = make("N0", {3, 8});
+
+/** N0 values to test with cl_image support - Precommit */
+const auto n0_values_export_to_cl_image_precommit = make("N0", {4});
+
+/** N0 values to test with cl_image support - Nightly */
+const auto n0_values_export_to_cl_image_nightly = make("N0", {8});
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
+/** Activation values to test in precommit */
+const auto act_values = make("Activation", { ActivationLayerInfo() });
+
+const auto activations_rest = make("Activation",
{
- ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
});
} // namespace
@@ -129,94 +155,264 @@ TEST_SUITE(DepthwiseConvolutionLayerNative)
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
width_values_precommit,
height_values_precommit),
channel_values_precommit),
batch_values_precommit),
kernel_sz_values_precommit),
- framework::dataset::make("depth_multiplier", 1)),
+ make("depth_multiplier", 1)),
dilation_values),
stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F32)),
+ make("DataType", DataType::F32)),
data_layout_values),
act_values),
- n0_values_precommit))
+ n0_values_precommit),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
width_values_nightly,
height_values_nightly),
channel_values_nightly),
batch_values_nightly),
kernel_sz_values_nightly),
- framework::dataset::make("depth_multiplier", 1)),
+ make("depth_multiplier", 1)),
dilation_values),
stride_values),
- padding_valid_values),
- framework::dataset::make("DataType", DataType::F32)),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F32)),
data_layout_values),
- act_values),
- n0_values_nightly))
+ make("Activation", { ActivationLayerInfo() })),
+ n0_values_nightly),
+ make("ExportToCLImage", false)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(
+ make("width", { 33U } ),
+ height_values_precommit,
+ channel_values_precommit,
+ make("batch", { 2U } ),
+ make("kernel_size", { Size2D(5U, 5U) }),
+ make("depth_multiplier", 1),
+ make("dilation", Size2D(3U, 3U)),
+ make("stride", Size2D(3U, 2U)),
+ padding_valid_values_nightly,
+ make("DataType", DataType::F32),
+ data_layout_values,
+ activations_rest,
+ n0_values_precommit,
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ width_values_precommit,
+ height_values_precommit),
+ channel_values_export_to_cl_image_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 1)),
+ dilation_values),
+ stride_values),
+ padding_valid_values),
+ make("DataType", DataType::F32)),
+ data_layout_values),
+ act_values),
+ n0_values_export_to_cl_image_precommit),
+ make("ExportToCLImage", true)))
+{
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ width_values_nightly,
+ height_values_nightly),
+ channel_values_export_to_cl_image_nightly),
+ batch_values_nightly),
+ kernel_sz_values_nightly),
+ make("depth_multiplier", 1)),
+ dilation_values),
+ stride_values),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F32)),
+ data_layout_values),
+ make("Activation", { ActivationLayerInfo() })),
+ n0_values_export_to_cl_image_nightly),
+ make("ExportToCLImage", true)))
+{
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_SUITE_END() // ExportWeightsToCLImage
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
width_values_precommit,
height_values_precommit),
channel_values_precommit),
batch_values_precommit),
kernel_sz_values_precommit),
- framework::dataset::make("depth_multiplier", 1)),
+ make("depth_multiplier", 1)),
dilation_values),
stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F16)),
+ make("DataType", DataType::F16)),
data_layout_values),
act_values),
- n0_values_precommit))
+ n0_values_precommit),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16);
}
-
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- width_values_nightly,
- height_values_nightly),
- channel_values_nightly),
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 47U } ),
+ make("height", { 39U } )),
+ make("channels", { 19U } )),
batch_values_nightly),
- kernel_sz_values_nightly),
- framework::dataset::make("depth_multiplier", 1)),
+ make("kernel_size", { Size2D(5U, 5U) })),
+ make("depth_multiplier", 1)),
+ make("dilation", { Size2D(3U, 3U) })),
+ make("stride", { Size2D(3U, 2U) })),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F16)),
+ data_layout_values),
+ make("Activation", { ActivationLayerInfo() })),
+ n0_values_nightly),
+ make("ExportToCLImage", false)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+ combine(
+ make("width", { 33U } ),
+ height_values_precommit,
+ channel_values_precommit,
+ make("batch", { 2U } ),
+ make("kernel_size", { Size2D(5U, 5U) }),
+ make("depth_multiplier", 4),
+ make("dilation", Size2D(3U, 3U)),
+ make("stride", Size2D(3U, 2U)),
+ padding_valid_values_nightly,
+ make("DataType", DataType::F16),
+ data_layout_values,
+ activations_rest,
+ n0_values_precommit,
+ make("ExportToCLImage", false)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ width_values_precommit,
+ height_values_precommit),
+ channel_values_export_to_cl_image_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 1)),
dilation_values),
stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F16)),
+ make("DataType", DataType::F16)),
data_layout_values),
act_values),
- n0_values_nightly))
+ n0_values_export_to_cl_image_precommit),
+ make("ExportToCLImage", true)))
{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 47U } ),
+ make("height", { 39U } )),
+ channel_values_export_to_cl_image_nightly),
+ batch_values_nightly),
+ make("kernel_size", { Size2D(5U, 5U) })),
+ make("depth_multiplier", 1)),
+ make("dilation", { Size2D(3U, 3U) })),
+ make("stride", { Size2D(3U, 2U) })),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F16)),
+ data_layout_values),
+ make("Activation", { ActivationLayerInfo() })),
+ n0_values_export_to_cl_image_nightly),
+ make("ExportToCLImage", true)))
+{
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
+TEST_SUITE_END() // ExportWeightsToCLImage
TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
+
TEST_SUITE(DepthMultiplier)
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- width_values_precommit,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
height_values_precommit),
channel_values_precommit),
batch_values_precommit),
@@ -225,18 +421,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<fl
dilation_values),
stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F32)),
+ make("DataType", DataType::F32)),
data_layout_values),
act_values),
- framework::dataset::make("N0", 1)))
+ make("N0", 1)),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- width_values_nightly,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 53U } ),
height_values_nightly),
channel_values_nightly),
batch_values_nightly),
@@ -244,21 +441,77 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<fl
depth_multiplier_values),
dilation_values),
stride_values),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F32)),
+ data_layout_values),
+ make("Activation", { ActivationLayerInfo() })),
+ make("N0", 1)),
+ make("ExportToCLImage", false)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+TEST_SUITE(DepthMultiplierMultipleOfOutputChannels)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
+ height_values_precommit),
+ channel_values_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 2)),
+ dilation_values),
+ stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F32)),
+ make("DataType", DataType::F32)),
data_layout_values),
act_values),
- framework::dataset::make("N0", 1)))
+ make("N0", {2})),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
+ height_values_precommit),
+ channel_values_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 4)),
+ dilation_values),
+ stride_values),
+ padding_valid_values),
+ make("DataType", DataType::F32)),
+ data_layout_values),
+ act_values),
+ make("N0", {4})),
+ make("ExportToCLImage", true)))
+{
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
+TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- width_values_precommit,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
height_values_precommit),
channel_values_precommit),
batch_values_precommit),
@@ -267,18 +520,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<ha
dilation_values),
stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F16)),
+ make("DataType", DataType::F16)),
data_layout_values),
act_values),
- framework::dataset::make("N0", 1)))
+ make("N0", 1)),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- width_values_nightly,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 53U } ),
height_values_nightly),
channel_values_nightly),
batch_values_nightly),
@@ -286,15 +540,71 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<ha
depth_multiplier_values),
dilation_values),
stride_values),
+ padding_valid_values_nightly),
+ make("DataType", DataType::F16)),
+ data_layout_values),
+ make("Activation", { ActivationLayerInfo() })),
+ make("N0", 1)),
+ make("ExportToCLImage", false)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE(DepthMultiplierMultipleOfOutputChannels)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
+ height_values_precommit),
+ channel_values_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 2)),
+ dilation_values),
+ stride_values),
padding_valid_values),
- framework::dataset::make("DataType", DataType::F16)),
+ make("DataType", DataType::F16)),
data_layout_values),
act_values),
- framework::dataset::make("N0", 1)))
+ make("N0", {2})),
+ make("ExportToCLImage", false)))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
}
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ make("width", { 33U } ),
+ height_values_precommit),
+ channel_values_precommit),
+ batch_values_precommit),
+ kernel_sz_values_precommit),
+ make("depth_multiplier", 4)),
+ dilation_values),
+ stride_values),
+ padding_valid_values),
+ make("DataType", DataType::F16)),
+ data_layout_values),
+ act_values),
+ make("N0", {4})),
+ make("ExportToCLImage", true)))
+{
+ // Validate output
+ if(_validate_output)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
+TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels
TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
TEST_SUITE_END() // DepthMultiplier
diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp
index 9a9df2c7e4..776bf34151 100644
--- a/tests/validation/CL/DilatedConvolutionLayer.cpp
+++ b/tests/validation/CL/DilatedConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -167,13 +167,18 @@ template <typename T>
using CLGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
TEST_SUITE(Quantized)
+/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored
+/// This is because instead of using the same quantization information for all the tensors, the fixture generates
+/// separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, we can remove the explicit
+/// quantization info.
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })))
{
// Validate output
@@ -185,7 +190,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerQuantizedFixture<u
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })),
+ framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })))
{
// Validate output
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index a057f48c87..ff22ae5ef0 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,9 @@
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
+ * Please check there for any differences in the coverage
+ */
namespace arm_compute
{
namespace test
@@ -43,10 +46,12 @@ namespace validation
{
namespace
{
-RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
-RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */
-constexpr float tolerance_num = 0.07f; /**< Tolerance number */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
+RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */
+constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
const auto data_strides = combine(framework::dataset::make("StrideX", 1, 3), framework::dataset::make("StrideY", 1, 3));
const auto data_strides_small = combine(framework::dataset::make("StrideX", 1), framework::dataset::make("StrideY", 1));
@@ -130,55 +135,89 @@ TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
validate(CLAccessor(dst), ref_dst);
}
+/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal
+ * would lead to successful run
+ */
+TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT)
+{
+ auto src_shape = TensorShape(33U, 27U, 3U);
+ auto weights_shape = TensorShape(5U, 7U, 3U, 4U); // non-square kernel
+ const auto bias_shape = TensorShape(4U);
+ auto dst_shape = TensorShape(11U, 12U, 4U);
+ constexpr auto dt = DataType::F32;
+
+ TensorShape src_shape_nhwc(src_shape);
+ TensorShape weights_shape_nhwc(weights_shape);
+ TensorShape dst_shape_nhwc(dst_shape);
+
+ // Non-square shapes are only allowed for NHWC
+ permute(src_shape_nhwc, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape_nhwc, PermutationVector(2U, 0U, 1U));
+ permute(dst_shape_nhwc, PermutationVector(2U, 0U, 1U));
+
+ auto src = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+ auto weights = create_tensor<CLTensor>(weights_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+ auto dst = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+ const auto conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR);
+
+ // Create direct convolution function
+ CLDirectConvolutionLayer conv{};
+ conv.configure(&src, &weights, nullptr, &dst, conv_info);
+
+ src.allocator()->allocate();
+ weights.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ library->fill_tensor_value(CLAccessor(src), 1.f);
+ library->fill_tensor_value(CLAccessor(weights), 1.f);
+
+ conv.run();
+
+ // Compute reference to compare
+ SimpleTensor<float> ref_src{ src_shape, dt };
+ SimpleTensor<float> ref_weights{ weights_shape, dt };
+ SimpleTensor<float> ref_bias{ bias_shape, dt };
+ library->fill_tensor_value(ref_src, 1.f);
+ library->fill_tensor_value(ref_weights, 1.f);
+ // No bias
+ library->fill_tensor_value(ref_bias, 0.f);
+ auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info);
+
+ validate(CLAccessor(dst), ref_dst);
+}
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
}),
framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
})),
framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
- TensorInfo(TensorShape(4U), 1, DataType::F32),
- TensorInfo(TensorShape(4U), 1, DataType::F32),
- TensorInfo(TensorShape(4U), 1, DataType::F32),
TensorInfo(TensorShape(3U), 1, DataType::F32),
TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
- TensorInfo(TensorShape(4U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
})),
framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
@@ -186,23 +225,27 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(3, 3, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
- PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
})),
framework::dataset::make("ActivationInfo",
{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
})),
- framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, true })),
input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
{
bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
}
+// clang-format on
+// *INDENT-ON*
template <typename T>
using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
@@ -218,6 +261,46 @@ template <typename T>
using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
TEST_SUITE(NHWC)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", {
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported
+ }),
+ framework::dataset::make("WeightsInfo",{
+ TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("BiasesInfo",{
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("OutputInfo",{
+ TensorInfo(TensorShape(4U, 15U, 1U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U, 23U, 11U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U, 9U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("ConvInfo", {
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(3, 3, 0, 0),
+ })),
+ framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+})),
+ framework::dataset::make("Expected", { true, true, true })),
+ input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
+{
+ bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(zip(zip(zip(zip(zip(zip(
@@ -273,7 +356,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framewo
framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
framework::dataset::make("DataLayout", DataLayout::NHWC)))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(zip(zip(zip(zip(zip(zip(
@@ -291,7 +374,7 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayo
framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
framework::dataset::make("DataLayout", DataLayout::NHWC)))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(zip(zip(zip(zip(zip(zip(
@@ -306,7 +389,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framewo
framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
framework::dataset::make("DataLayout", DataLayout::NHWC)))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
TEST_SUITE_END() // FP32
@@ -427,9 +510,48 @@ TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // NHWC
+TEST_SUITE(NCHW)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", {
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported kernel width
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Non-rectangular weights dimensions are unsupported
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW) // Unsupported stride
+ }),
+ framework::dataset::make("WeightsInfo",{
+ TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW)
+ })),
+ framework::dataset::make("BiasesInfo",{
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW)
+ })),
+ framework::dataset::make("OutputInfo",{
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(23U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW)
+ })),
+ framework::dataset::make("ConvInfo", {
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(3, 3, 0, 0)
+ })),
+ framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})),
+ framework::dataset::make("Expected", { false, false, false})),
+ input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
+{
+ bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
-TEST_SUITE(NCHW)
+
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType", DataType::F16)),
@@ -454,20 +576,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framewo
ActivationFunctionsDataset),
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
- DataType::F32)),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit,
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ ActivationFunctionsDataset),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType", DataType::F32)),
ActivationFunctionsDataset),
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
TEST_SUITE_END() // FP32
@@ -477,107 +600,202 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture
ActivationFunctionsDataset))
{
// Validate output
- validate(CLAccessor(_target), _reference, tolerance_fp32);
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
}
TEST_SUITE_END() // FP32_CustomDataset
TEST_SUITE_END() // Float
+/// @note: Every quantized test has a version with or without activation because the quantization info given is
+/// ignored when there is no activation. Instead of using the same quantization information for all the tensors, the
+/// fixture generates separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
+/// again, with the explicitly specified quantization info removed
const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
{
- ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
});
+const auto NoActivation = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo()
+});
+const auto IgnoredQuantizationInfo = framework::dataset::make("IgnoredQuantizationInfo",
+{
+ QuantizationInfo()
+});
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit,
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10)})),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit,
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit_9x9,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9,
framework::dataset::make("DataType",
- DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+ DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly, framework::dataset::make("DataType",
- DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType",
+ DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly_9x9,
+FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType",
+ DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfoIf", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9,
framework::dataset::make("DataType",
- DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+ DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-
-TEST_SUITE_END() // QASYMM8
-
-TEST_SUITE(QASYMM8_CustomDataset)
-FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunLarge9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(CustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::DirectConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(CustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::DirectConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-TEST_SUITE_END() // QASYMM8_CustomDataset
+TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit, framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) })),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ NoActivation,
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit, framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.1f, -10) })),
- QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit_9x9,
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit,
framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+ DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.1f, -10) }),
+ QuantizedActivationFunctionsDataset,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
@@ -585,10 +803,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int
}
FIXTURE_DATA_TEST_CASE(RunCustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
- QuantizedActivationFunctionsDataset),
+ combine(datasets::DirectConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ NoActivation,
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunCustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::DirectConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }),
+ QuantizedActivationFunctionsDataset,
framework::dataset::make("DataLayout", { DataLayout::NCHW })))
{
// Validate output
diff --git a/tests/validation/CL/ElementwiseMax.cpp b/tests/validation/CL/ElementwiseMax.cpp
index b9444b2795..bd47c23256 100644
--- a/tests/validation/CL/ElementwiseMax.cpp
+++ b/tests/validation/CL/ElementwiseMax.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,7 @@ const auto ElementwiseMaxQASYMM8SignedDataset = combine(combine(framework::datas
const auto ElementwiseMaxQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("DataType",
DataType::QSYMM16));
-const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
framework::dataset::make("DataType", DataType::S16));
const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
@@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMax)
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseMax::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -107,7 +106,8 @@ using CLElementwiseMaxFixture = ElementwiseMaxValidationFixture<CLTensor, CLAcce
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::Da
TEST_SUITE_END()
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -128,33 +129,36 @@ using CLElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixtur
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQASYMM8Dataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
}
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQASYMM8SignedDataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END()
TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQSYMM16Dataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -167,13 +171,16 @@ using CLElementwiseMaxFloatFixture = ElementwiseMaxValidationFloatFixture<CLTens
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, fr
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -197,16 +206,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, f
template <typename T>
using CLElementwiseMaxBroadcastFloatFixture = ElementwiseMaxBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMax, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
ElementwiseMaxFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ElementwiseMaxFP32Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwiseMin.cpp b/tests/validation/CL/ElementwiseMin.cpp
index 8f53b241ab..ee229a0941 100644
--- a/tests/validation/CL/ElementwiseMin.cpp
+++ b/tests/validation/CL/ElementwiseMin.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,7 @@ const auto ElementwiseMinQASYMM8SignedDataset = combine(combine(framework::datas
const auto ElementwiseMinQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("DataType",
DataType::QSYMM16));
-const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
framework::dataset::make("DataType", DataType::S16));
const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
@@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMin)
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseMin::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -107,7 +106,8 @@ using CLElementwiseMinFixture = ElementwiseMinValidationFixture<CLTensor, CLAcce
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMinU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMinU8Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::Da
TEST_SUITE_END()
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinS16Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -128,33 +129,36 @@ using CLElementwiseMinQuantizedFixture = ElementwiseMinValidationQuantizedFixtur
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMinQASYMM8Dataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
}
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMinQASYMM8SignedDataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END()
TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMinQSYMM16Dataset),
framework::dataset::make("SrcQInfo0", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
framework::dataset::make("SrcQInfo1", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -167,13 +171,16 @@ using CLElementwiseMinFloatFixture = ElementwiseMinValidationFloatFixture<CLTens
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, fr
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -196,16 +205,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, f
template <typename T>
using CLElementwiseMinBroadcastFloatFixture = ElementwiseMinBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMin, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
ElementwiseMinFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ElementwiseMinFP32Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwisePower.cpp b/tests/validation/CL/ElementwisePower.cpp
index a2d3ba6c09..c2aeb6e045 100644
--- a/tests/validation/CL/ElementwisePower.cpp
+++ b/tests/validation/CL/ElementwisePower.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -96,29 +98,33 @@ using CLElementwisePowerBroadcastFloatFixture = ElementwisePowerBroadcastValidat
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
ElementwisePowerFP16Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ElementwisePowerFP16Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -126,29 +132,33 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFl
TEST_SUITE_END() //FP16
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
ElementwisePowerFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ElementwisePowerFP32Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwiseSquaredDiff.cpp b/tests/validation/CL/ElementwiseSquaredDiff.cpp
index 0a4ab6627b..ee0279df33 100644
--- a/tests/validation/CL/ElementwiseSquaredDiff.cpp
+++ b/tests/validation/CL/ElementwiseSquaredDiff.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,7 +57,7 @@ const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dat
const auto ElementwiseSquaredDiffQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("DataType",
DataType::QSYMM16));
-const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
framework::dataset::make("DataType", DataType::S16));
const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
@@ -70,6 +70,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(CL)
@@ -79,21 +81,18 @@ TEST_SUITE(ElementwiseSquaredDiff)
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLElementwiseSquaredDiff::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -106,7 +105,8 @@ using CLElementwiseSquaredDiffFixture = ElementwiseSquaredDiffValidationFixture<
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -114,7 +114,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, frame
TEST_SUITE_END()
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -127,22 +128,24 @@ using CLElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidatio
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseSquaredDiffQASYMM8Dataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
}
TEST_SUITE_END()
TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseSquaredDiffQSYMM16Dataset),
framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qsymm16);
@@ -155,14 +158,16 @@ using CLElementwiseSquaredDiffFloatFixture = ElementwiseSquaredDiffValidationFlo
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -170,14 +175,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<h
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
- EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset),
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -185,16 +192,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<f
template <typename T>
using CLElementwiseSquaredDiffBroadcastFloatFixture = ElementwiseSquaredDiffBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseSquaredDiff, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
ElementwiseSquaredDiffFP32Dataset),
- EmptyActivationFunctionsDataset))
+ EmptyActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
ElementwiseSquaredDiffFP32Dataset),
- ActivationFunctionsDataset))
+ ActivationFunctionsDataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index 09da519c51..2f0c86499b 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
/** Tolerance for float operations */
@@ -51,15 +52,20 @@ constexpr float tolerance_num = 0.07f; /**< Tolerance n
/** Tolerance for quantized asymmetric operations */
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
-const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto FullyConnectedParameters = combine(make("TransposeWeights", { false, true }), make("ReshapeWeights", { false, true }));
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto QuantizationData = make("QuantizationInfo",
{
QuantizationInfo(1.f / 255.f, 10),
QuantizationInfo(1.1f, 10),
});
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto IgnoredQuantizationData = make("IgnoredQuantizationInfo",
+{
+ QuantizationInfo(),
+});
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
@@ -68,11 +74,16 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)
});
-const auto ActivationFunctionsQuantizedDataset = concat(concat(concat(
- framework::dataset::make("ActivationInfo", ActivationLayerInfo()),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f))),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)));
+// This dataset case only runs with dynamic quantization
+const auto NoActivationFunctionsQuantizedDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo()
+});
+
+const auto ActivationFunctionsQuantizedDataset = concat(concat(
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f))),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)));
} // namespace
TEST_SUITE(CL)
@@ -81,33 +92,33 @@ TEST_SUITE(FullyConnectedLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types
+ make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types
TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Invalid weights dimensions
TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Wrongly reshaped weights
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
+ make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
TensorInfo(TensorShape(217U, 231U), 1, DataType::F32),
TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
+ make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
TensorInfo(TensorShape(192U), 1, DataType::F32),
TensorInfo(TensorShape(192U), 1, DataType::F32),
TensorInfo(TensorShape(271U), 1, DataType::F32),
TensorInfo(TensorShape(271U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+ make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
})),
- framework::dataset::make("TransposeWeights",{ true, true, false, true, true })),
- framework::dataset::make("ReshapedWeights",{ false, false, false, false, false})),
- framework::dataset::make("Expected", { false, true, true, false, false })),
+ make("TransposeWeights",{ true, true, false, true, true })),
+ make("ReshapedWeights",{ false, false, false, false, false})),
+ make("Expected", { false, true, true, false, false })),
input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
{
// Create Fully Connected layer info
@@ -131,54 +142,69 @@ template <typename T>
using CLFullyConnectedLayerMixedDataLayoutFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, true>;
template <typename T>
using CLFullyConnectedLayerDynamicWeightsFixture = FullyConnectedWithDynamicWeightsFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+template <typename T>
+using CLFullyConnectedNoBiasFixture = FullyConnectedDynamicNoBiasFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::F16),
ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::F16),
ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::F16),
+ make("ActivationInfo", ActivationLayerInfo()),
+ make("WeightsReshaped", { false, true })))
+{
+}
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters,
+ make("DataType", DataType::F32),
ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::F32),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::F32),
+ make("ActivationInfo", ActivationLayerInfo()),
+ make("WeightsReshaped", { false, true })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::F32),
+ make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }),
+ make("WeightsReshaped", { false })))
{
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters,
+ make("DataType", DataType::F32),
ActivationFunctionsDataset))
{
// Validate output
@@ -194,57 +220,132 @@ using CLFullyConnectedLayerQuantizedMixedDataLayoutFixture = FullyConnectedLayer
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData,
ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ QuantizationData,
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData,
ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
+
+// Dynamic Quantization Tests
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData,
+ NoActivationFunctionsQuantizedDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData,
+ NoActivationFunctionsQuantizedDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8),
+ NoActivationFunctionsQuantizedDataset,
+ make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
+
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationData,
+ NoActivationFunctionsQuantizedDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
TEST_SUITE_END() /* QASYMM8 */
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), QuantizationData,
ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ QuantizationData,
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+// Dynamic Quantization tests below
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), IgnoredQuantizationData,
+ NoActivationFunctionsQuantizedDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- QuantizationData),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationData,
+ NoActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
+
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo", ActivationLayerInfo()),
+ make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo", ActivationLayerInfo()),
+ make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // FullyConnectedLayer
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index 838920c29d..16ca14f1d6 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,6 +62,29 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
TEST_SUITE(CL)
TEST_SUITE(GEMM)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
+ TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
+ })),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
+ TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { false, true })),
+ lhs_info, rhs_info, output_info, expected)
+{
+ constexpr float alpha = 1.0;
+ constexpr float beta = 0.0;
+ const auto gemm_info = GEMMInfo();
+ bool is_valid = bool(CLGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
template <typename T>
using CLGEMMFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T>;
@@ -71,6 +94,9 @@ using CLGEMMOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM
template <typename T>
using CLGEMMInputOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, false, true, true>;
+template <typename T>
+using CLBatchedMatMulFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, true, false, false, false, false, true>;
+
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
@@ -181,10 +207,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMOutput3DFixture<half>, framework::Dataset
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
TEST_SUITE_END() // FP16
-
TEST_SUITE_END() // Float
TEST_SUITE_END() // OUTPUT_3D
+TEST_SUITE(BATCHED_MATMUL)
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+ framework::dataset::make("ReshapeWeights", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+ framework::dataset::make("ReshapeWeights", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() // BATCHED_MATMUL
+
TEST_SUITE_END() // GEMM
TEST_SUITE_END() // CL
} // namespace validation
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 5a1971b54c..78d794a9bb 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,6 +44,9 @@ namespace test
{
namespace validation
{
+
+using framework::dataset::make;
+
namespace
{
constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
@@ -53,6 +56,7 @@ TEST_SUITE(GEMMLowp)
TEST_SUITE(MatrixMultiplyCore)
using CLGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
+using CLGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, true>;
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
{
@@ -66,18 +70,74 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework:
validate(CLAccessor(_target), _reference);
}
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
+ GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
+TEST_SUITE(BatchedMatMul)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { false })))
+{
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8
+
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
+ GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+ make("DataType", { DataType::QASYMM8_SIGNED }),
+ make("reshape_b_only_on_first_run", { false })))
+{
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // BatchedMatMul
+
TEST_SUITE(FusedOffsetOutput)
TEST_SUITE(QASYMM8)
using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
- framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_quant);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
- framework::dataset::make("DataType", { DataType::QASYMM8 })))
+TEST_SUITE(Output3D)
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture =
+ GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { true, false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // Output3D
+
+TEST_SUITE(InputOutput3D)
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture =
+ GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { true, false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // InputOutput3D
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -86,8 +146,10 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture =
GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
- framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
+ make("DataType", { DataType::QASYMM8_SIGNED }),
+ make("reshape_b_only_on_first_run", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -130,24 +192,24 @@ TEST_SUITE(QuantizeDownInt32Scale)
TEST_SUITE(QASYMM8)
-const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
- 3)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+ * make("min", 0) * make("max", 255) * make("addBias", { false, true });
-const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
- 2)
- * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+ * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true });
using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -158,24 +220,24 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
- 3)
- * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+ * make("min", -128) * make("max", 127) * make("addBias", { false, true });
-const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
- 2)
- * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+ * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true });
using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -185,140 +247,6 @@ TEST_SUITE_END() // BoundedReLu
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // QuantizeDownInt32Scale
-TEST_SUITE(QuantizeDownInt32ScaleByFixedPoint)
-
-TEST_SUITE(QASYMM8)
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
-using CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QASYMM8
-TEST_SUITE(QASYMM8_SIGNED)
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true });
-using CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int8_scale_by_fixedpoint_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QASYMM8_SIGNED
-TEST_SUITE(QSYMM16)
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823,
- 1073741825)
- * framework::dataset::make("result_shift", -3,
- -2)
- * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
- 254601602)
- * framework::dataset::make("result_shift", -3,
- -1)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
-
-TEST_SUITE(NoRelu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // NoRelu
-TEST_SUITE(BoundedReLu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QSYMM16
-TEST_SUITE_END() // QuantizeDownInt32ScaleByFixedPoint
-
TEST_SUITE(QuantizeDownInt32ScaleByFloat)
TEST_SUITE(QASYMM8)
@@ -326,13 +254,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture =
GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>;
FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
- datasets::TinyShapes()),
- framework::dataset::make("result_real_multiplier", 0.33f)),
- framework::dataset::make("result_offset", 2, 3)),
- framework::dataset::make("min", 0)),
- framework::dataset::make("max", 255)),
- framework::dataset::make("addBias", { false, true })))
+ combine(
+ make("DataType", DataType::QASYMM8),
+ datasets::TinyShapes(),
+ make("result_real_multiplier", 0.33f),
+ make("result_offset", 2, 3),
+ make("min", 0),
+ make("max", 255),
+ make("addBias", { false, true })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -343,13 +272,14 @@ TEST_SUITE(QASYMM8_SIGNED)
using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed =
GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>;
FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
- datasets::TinyShapes()),
- framework::dataset::make("result_real_multiplier", 0.33f)),
- framework::dataset::make("result_offset", 2, 3)),
- framework::dataset::make("min", -128)),
- framework::dataset::make("max", 127)),
- framework::dataset::make("addBias", { false, true })))
+ combine(
+ make("DataType", DataType::QASYMM8_SIGNED),
+ datasets::TinyShapes(),
+ make("result_real_multiplier", 0.33f),
+ make("result_offset", 2, 3),
+ make("min", -128),
+ make("max", 127),
+ make("addBias", { false, true })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -363,4 +293,4 @@ TEST_SUITE_END() // GEMMLowp
TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
index 1057af95f2..d0d06a8ddb 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@
*/
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
@@ -41,7 +41,7 @@ namespace validation
using namespace arm_compute::misc::shape_calculator;
// Create function for CLGEMMMatrixMultiplyNativeKernel
-using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyNativeKernel>;
+using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyNativeKernel>;
// Fixture for CLGEMMLowpMatrixMultiplyNative
using CLGEMMLowpMatrixMultiplyNativeFixture = GEMMLowpMatrixMultiplyNativeValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyNative>;
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
index 68a7d055ad..88455bdeb8 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
@@ -23,9 +23,9 @@
*/
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
@@ -49,7 +49,7 @@ using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmResha
using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
// Create function for CLGEMMLowpMatrixMultiplyReshapedKernel
-using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedKernel>;
+using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedKernel>;
// Fixture for CLGEMMLowpMatrixMultiplyReshaped
using CLGEMMLowpMatrixMultiplyReshapedFixture = GEMMLowpMatrixMultiplyReshapedValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshaped>;
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
index 43b86b51e8..c56901effc 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
@@ -25,8 +25,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -49,7 +49,7 @@ using namespace arm_compute::misc::shape_calculator;
using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
// Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
-using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>;
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>;
// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture = GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
@@ -157,7 +157,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
// Create and configure function
CLGEMMLowpMatrixMultiplyReshapedOnlyRHS gemm;
- gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
}
} // namespace
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp
new file mode 100644
index 0000000000..a0d13c3e39
--- /dev/null
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/fixtures/GEMMLowpFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::opencl::kernels;
+
+// Create function for CLGEMMReshapeRHSMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
+
+// Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture =
+ GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned =
+ GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<int8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>;
+
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned =
+ GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<uint8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", {16, 49});
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", {16, 259});
+
+/** K values to test */
+const auto k_values = framework::dataset::make("K", {192});
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", {1, 2});
+
+/** M0 values to test - Precommit */
+const auto m0 = framework::dataset::make("M0", {1, 2, 4});
+
+/** N0 values to test - Precommit */
+const auto n0 = framework::dataset::make("N0", { 1, 4, 8});
+
+/** K0 values to test - Precommit */
+const auto k0 = framework::dataset::make("K0", { 4 });
+
+/** H0 values to test - Precommit */
+const auto h0 = framework::dataset::make("H0", 1);
+
+/** Interleave values to test with RHS matrix */
+const auto i_values_rhs = framework::dataset::make("interleave_rhs", { false });
+
+/** Transpose values to test with RHS matrix */
+const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true });
+
+const auto broadcast_bias = framework::dataset::make("broadcast_bias", {true, false});
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL)
+FIXTURE_DATA_TEST_CASE(Signed, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0),
+ n0),
+ k0),
+ h0),
+ i_values_rhs),
+ t_values_rhs),
+ framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+{
+ // Validate output
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ validate(CLAccessor(_target), _reference);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+FIXTURE_DATA_TEST_CASE(Unsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0),
+ n0),
+ k0),
+ h0),
+ i_values_rhs),
+ t_values_rhs),
+ framework::dataset::make("DataType", { DataType::QASYMM8})))
+{
+ // Validate output
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ validate(CLAccessor(_target), _reference);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+FIXTURE_DATA_TEST_CASE(OutputStageSigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0),
+ n0),
+ k0),
+ h0),
+ i_values_rhs),
+ t_values_rhs),
+ broadcast_bias),
+ framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED})))
+{
+ // Validate output
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ validate(CLAccessor(_target), _reference);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+FIXTURE_DATA_TEST_CASE(OutputStageUnsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0),
+ n0),
+ k0),
+ h0),
+ i_values_rhs),
+ t_values_rhs),
+ broadcast_bias),
+ framework::dataset::make("DataType", { DataType::QASYMM8})))
+{
+ // Validate output
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ validate(CLAccessor(_target), _reference);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+TEST_SUITE_END() // GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiply.cpp b/tests/validation/CL/GEMMMatrixMultiply.cpp
deleted file mode 100644
index 21e085087d..0000000000
--- a/tests/validation/CL/GEMMMatrixMultiply.cpp
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright (c) 2019-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/Helper.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GEMMFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::opencl::kernels;
-
-// Create function for CLGEMMMatrixMultiplyKernel
-using CLGEMMMatrixMultiplyNative = CLSynthetizeOperator<ClGemmMatrixMultiplyKernel>;
-
-// Fixture for GEMMMatrixMultiplyValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyNativeFixture = GEMMMatrixMultiplyValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
-
-// Fixture for GEMMMatrixMultiply3DValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyNative3DFixture = GEMMMatrixMultiply3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
-
-namespace
-{
-// *INDENT-OFF*
-// clang-format off
-RelativeTolerance<float> rel_tolerance_f32(0.001f);
-constexpr float abs_tolerance_f32(0.0001f);
-
-RelativeTolerance<half> rel_tolerance_f16(half(0.2));
-constexpr float tolerance_num_f16 = 0.02f;
-
-/** Alpha values to test */
-const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
-
-/** Beta values to test */
-const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} );
-
-/** M, N combinations to test
- * 1: Special 1x1 case
- * 2: Special multples of processor size in both dimensions
- * 3: Non multiples of processor size in both dimensions
- * 4: Special 1x1003 case
-*/
-const auto m_n_values = zip(
- framework::dataset::make("M", {1, 16, 37, 1}),
- framework::dataset::make("N", {1, 16, 51, 1003})
- );
-
-/** N values to test */
-const auto n_values = framework::dataset::make("N", {51, 1003});
-
-/** K values to test */
-const auto k_values = framework::dataset::make("K", 23);
-
-/** M_W values to test */
-const auto m_w_values = framework::dataset::make("M_W", 5);
-
-/** M_H values to test */
-const auto m_h_values = framework::dataset::make("M_H", 7);
-
-/** Batch size values to test */
-const auto b_values = framework::dataset::make("batch_size", 1, 3);
-
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
-{
- ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
-});
-
-/** Broadcast bias from vector to matrix */
-const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
-
-/** GPU architectures values to test */
-const auto gpu_arch_values = framework::dataset::make("GPUArch",
-{
- GPUTarget::MIDGARD,
- GPUTarget::BIFROST
-});
-
-/** Data types values to test in the configuration */
-const auto data_type_values = framework::dataset::make("DataType",
-{
- DataType::F32,
- DataType::F16
-});
-
-/** M values to test */
-const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false});
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GEMMMatrixMultiply)
-TEST_CASE(Negative, framework::DatasetMode::ALL)
-{
- // Unsupported QASYMM8 data type
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::QASYMM8);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::QASYMM8);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::QASYMM8);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Unsupported SIZE_T data type
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::SIZET);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::SIZET);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::SIZET);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Mixed precision with F32
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = true;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Max number of dimensions LHS matrix
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U, 4U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Max number of dimensions RHS matrix
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 4U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 4U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 4U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Broadcast bias
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F16);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F16);
- // The correct shape should be bias = TensorInfo(TensorShape(14U, 1U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, true);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Invalid dimensions for the bias
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be bias = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(14U, 8U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Invalid dimensions for the output
- {
- const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(14U, 7U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = false;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-}
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_n_values,
- k_values),
- b_values),
- alpha_values),
- beta_values),
- broadcast_bias_values),
- framework::dataset::make("fp16_mixed_precision", false)),
- act_values),
- framework::dataset::make("DataType", DataType::F32)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_w_values,
- m_h_values),
- n_values),
- k_values),
- b_values),
- alpha_values),
- beta_values),
- broadcast_bias_values),
- framework::dataset::make("fp16_mixed_precision", false)),
- act_values),
- framework::dataset::make("DataType", DataType::F32)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_n_values,
- k_values),
- b_values),
- alpha_values),
- beta_values),
- broadcast_bias_values),
- fp16_mixed_precision_values),
- act_values),
- framework::dataset::make("DataType", DataType::F16)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_w_values,
- m_h_values),
- n_values),
- k_values),
- b_values),
- alpha_values),
- beta_values),
- broadcast_bias_values),
- fp16_mixed_precision_values),
- act_values),
- framework::dataset::make("DataType", DataType::F16)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-TEST_SUITE_END() // GEMMMatrixMuliplty
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
deleted file mode 100644
index e47518ad7d..0000000000
--- a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Copyright (c) 2019-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/Helper.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GEMMFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::opencl::kernels;
-
-// Create function for ClGemmReshapeLhsMatrixKernel
-using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
-
-// Create function for ClGemmReshapeRhsMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
-
-// Create function for ClGemmMatrixMultiplyKernel
-using CLGEMMMatrixMultiplyReshaped = CLSynthetizeOperator<ClGemmMatrixMultiplyKernel>;
-
-// Fixture for GEMMMatrixMultiplyInterleavedTransposedValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyReshapedFixture =
- GEMMMatrixMultiplyInterleavedTransposedValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
-
-// Fixture for GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyReshaped3DFixture =
- GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
-
-namespace
-{
-// *INDENT-OFF*
-// clang-format off
-RelativeTolerance<float> rel_tolerance_f32(0.001f);
-constexpr float abs_tolerance_f32(0.0001f);
-
-RelativeTolerance<half> rel_tolerance_f16(half(0.2));
-constexpr float tolerance_num_f16 = 0.02f;
-
-/** Alpha values to test */
-const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
-
-/** Beta values to test */
-const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} );
-
-/** M, N combinations to test
- * 1: Special 1x1 case
- * 2: Special multples of processor size in both dimensions
- * 3: Non multiples of processor size in both dimensions
-*/
-const auto m_n_values = zip(
- framework::dataset::make("M", {1, 16, 37}),
- framework::dataset::make("N", {1, 16, 51})
- );
-
-/** N values to test */
-const auto n_values = framework::dataset::make("N", 51);
-
-/** K values to test */
-const auto k_values = framework::dataset::make("K", 23);
-
-/** M_W values to test */
-const auto m_w_values = framework::dataset::make("M_W", 5);
-
-/** M_H values to test */
-const auto m_h_values = framework::dataset::make("M_H", 7);
-
-/** Batch size values to test */
-const auto b_values = framework::dataset::make("batch_size", 1, 3);
-
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
-{
- ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
-});
-
-/** V0 values to test */
-const auto v0_values = framework::dataset::make("V0", 2);
-
-/** H0 values to test */
-const auto h0_values = framework::dataset::make("H0", 4);
-
-/** Broadcast bias from vector to matrix */
-const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", {false, true} );
-
-/** GPU architectures values to test */
-const auto gpu_arch_values = framework::dataset::make("GPUArch",
-{
- GPUTarget::MIDGARD,
- GPUTarget::BIFROST
-});
-
-/** Data types values to test in the configuration */
-const auto data_type_values = framework::dataset::make("DataType",
-{
- DataType::F32,
- DataType::F16
-});
-
-/** M values to test */
-const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false});
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GEMMMatrixMultiplyInterleavedTransposed)
-TEST_CASE(Negative, framework::DatasetMode::ALL)
-{
- // The following tests are already integrated in the GEMMMatrixMultiply validation because
- // in common with this validation
- // - Unsupported QASYMM8 data type
- // - Unsupported SIZE_T data type
- // - Mixed precision with F32
- // - Max number of dimensions LHS matrix
- // - Max number of dimensions RHS matrix
-
- // Invalid LHS dimensions
- {
- // The correct shape should be: lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
- const auto lhs = TensorInfo(TensorShape(256U, 2U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = true;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Invalid RHS dimensions
- {
- const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(104U, 4U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = true;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Broadcast bias
- {
- const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be bias = TensorInfo(TensorShape(24U, 1U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = true;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, true);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Invalid dimensions for the bias
- {
- const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(25U, 16U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = true;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-
- // Invalid dimensions for the output
- {
- const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
- const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
- const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- // The correct shape should be out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
- const auto out = TensorInfo(TensorShape(24U, 13U, 1U, 1U), 1, DataType::F32);
- constexpr float alpha = 1.3f;
- constexpr float beta = 0.7f;
- const bool is_interleaved_transposed = true;
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
- const GPUTarget gpu_target = GPUTarget::MIDGARD;
- const bool fp_mixed_precision = false;
- const auto status = ClGemmMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
- ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
- }
-}
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_n_values,
- k_values),
- b_values),
- alpha_values),
- beta_values),
- v0_values),
- h0_values),
- broadcast_bias_values),
- framework::dataset::make("fp16_mixed_precision", false)),
- act_values),
- framework::dataset::make("DataType", DataType::F32)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_w_values,
- m_h_values),
- n_values),
- k_values),
- b_values),
- alpha_values),
- beta_values),
- v0_values),
- h0_values),
- broadcast_bias_values),
- framework::dataset::make("fp16_mixed_precision", false)),
- act_values),
- framework::dataset::make("DataType", DataType::F32)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_n_values,
- k_values),
- b_values),
- alpha_values),
- beta_values),
- v0_values),
- h0_values),
- broadcast_bias_values),
- fp16_mixed_precision_values),
- act_values),
- framework::dataset::make("DataType", DataType::F16)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- m_w_values,
- m_h_values),
- n_values),
- k_values),
- b_values),
- alpha_values),
- beta_values),
- v0_values),
- h0_values),
- broadcast_bias_values),
- fp16_mixed_precision_values),
- act_values),
- framework::dataset::make("DataType", DataType::F16)),
- gpu_arch_values))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-TEST_SUITE_END() // GEMMMatrixMulipltyInterleavedTransposed
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
index a737c687c4..0ddf43766f 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -91,8 +91,8 @@ const auto b_values = framework::dataset::make("batch_size", 1, 3);
/** Activation values to test */
const auto act_values = framework::dataset::make("Activation",
{
- ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
});
/** M0 values to test - Precommit */
@@ -323,6 +323,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyNative3DFixture<float>, f
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // GEMMMatrixMulipltyNative
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
index 6f368a9650..b06e4bf213 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,9 +26,9 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -110,6 +110,7 @@ const auto b_values = framework::dataset::make("batch_size", 2, 3);
const auto act_values = framework::dataset::make("Activation",
{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
});
/** Alpha values to test - Precommit */
@@ -328,7 +329,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
framework::dataset::make("Expected", { true, true, false, false, false, true, true,true})),
input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
{
- ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
+ ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
&input1_info.clone()->set_is_resizable(true),
&input2_info.clone()->set_is_resizable(true),
&output_info.clone()->set_is_resizable(true),1.f,1.f,
@@ -336,6 +337,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
rhs_info,
gemm_info)) == expected, framework::LogLevel::ERRORS);
}
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
@@ -361,7 +363,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, fra
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::DISABLED,
@@ -386,7 +396,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, fra
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
@@ -411,7 +429,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::DISABLED,
@@ -436,8 +462,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
+
TEST_SUITE(ExportToCLImage)
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // OK or incorrect if cl_khr_image2d_from_buffer not supported
@@ -560,7 +595,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
true,
true,
false,
- false})),
+ true})),
input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
{
ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
@@ -704,6 +739,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
framework::ARM_COMPUTE_PRINT_INFO();
}
}
+
TEST_SUITE_END() // ExportToCLImage
TEST_SUITE_END() // FP32
@@ -731,7 +767,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, fram
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::DISABLED,
@@ -756,7 +800,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, fram
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL,
@@ -781,7 +833,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::DISABLED,
@@ -806,7 +866,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
TEST_SUITE(ExportToCLImage)
@@ -931,7 +999,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
true,
true,
false,
- false})),
+ true})),
input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
{
ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
@@ -1075,6 +1143,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
framework::ARM_COMPUTE_PRINT_INFO();
}
}
+
TEST_SUITE_END() // ExportToCLImage
TEST_SUITE_END() // FP16
@@ -1102,7 +1171,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
@@ -1127,7 +1204,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::ALL,
@@ -1152,7 +1237,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
@@ -1177,8 +1270,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF
act_values))
{
// Validate output
- validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
}
+
TEST_SUITE_END() // MixedPrecision
TEST_SUITE_END() // Float
TEST_SUITE_END() // GEMMMatrixMultiplyReshaped
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 88e99bcfef..dafc8dc5ec 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,8 +26,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -99,6 +99,7 @@ const auto b_values = framework::dataset::make("batch_size", 2);
const auto act_values = framework::dataset::make("Activation",
{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 10.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
});
/** M0 values to test - precommit */
@@ -211,6 +212,7 @@ bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
CLGEMMMatrixMultiplyReshapedOnlyRHS gemm;
return bool(gemm.validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info));
}
+
} // namespace
TEST_SUITE(CL)
@@ -462,6 +464,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
framework::ARM_COMPUTE_PRINT_INFO();
}
}
+
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
@@ -590,6 +593,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
framework::ARM_COMPUTE_PRINT_INFO();
}
}
+
TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp
new file mode 100644
index 0000000000..3b3cf85317
--- /dev/null
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::opencl::kernels;
+
+// Create function for ClGemmReshapeRhsMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
+
+// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel
+using CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel>;
+
+// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL
+template <typename T>
+using CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture = GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+RelativeTolerance<float> rel_tolerance_f32(0.001f);
+constexpr float abs_tolerance_f32(0.0001f);
+RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.001f));
+constexpr float abs_tolerance_f16(0.3f);
+
+/** Alpha values to test - Precommit */
+const auto a_values = framework::dataset::make("alpha", {1.0f, 0.75f} );
+
+/** Beta values to test - Precommit */
+const auto beta_values = framework::dataset::make("beta", {0.0f, -0.75f} );
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", {49});
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", {257});
+
+/** K values to test */
+/** The test case requires this to be multiple of 4*/
+const auto k_values = framework::dataset::make("K", {192});
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", {1, 2});
+
+/** Activation values to test */
+const auto act_values = framework::dataset::make("Activation",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+});
+
+/** M0 values to test - Precommit */
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 2, 4 });
+
+/** N0 values to test - Precommit */
+const auto n0_values_precommit = framework::dataset::make("N0", { 4, 8 });
+
+/** K0 values to test - Precommit */
+const auto k0_values_precommit = framework::dataset::make("K0", { 1 });
+
+/** Broadcast bias from vector to matrix */
+const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRhsMMUL)
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportToCLImage", false)),
+ framework::dataset::make("DataType", DataType::F32)),
+ a_values),
+ beta_values),
+ broadcast_bias_values),
+ act_values))
+{
+ // Validate output
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportToCLImage", false)),
+ framework::dataset::make("DataType", DataType::F16)),
+ a_values),
+ beta_values),
+ broadcast_bias_values),
+ act_values))
+{
+ // Validate output
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(ExportToCLImage)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportToCLImage", true)),
+ framework::dataset::make("DataType", DataType::F32)),
+ a_values),
+ beta_values),
+ broadcast_bias_values),
+ act_values))
+{
+ // Validate output
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ m_values,
+ n_values),
+ k_values),
+ b_values),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportToCLImage", true)),
+ framework::dataset::make("DataType", DataType::F16)),
+ a_values),
+ beta_values),
+ broadcast_bias_values),
+ act_values))
+{
+ // Validate output
+ if(validate_result)
+ {
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // ExportToCLImage
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // GEMMMatrixMultiplyReshapedOnlyRhsMMUL
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
index f995608308..0dd9b811f6 100644
--- a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -66,8 +66,10 @@ const auto b_values = framework::dataset::make("batchsize", 1, 3);
/** M0 values to test */
const auto m0_values_s32 = framework::dataset::make("M0", { 2, 3 });
-const auto m0_values_s16 = framework::dataset::make("M0", { 4, 5 });
-const auto m0_values_s8 = framework::dataset::make("M0", { 6, 7, 8 });
+const auto m0_values_s16 = framework::dataset::make("M0", { 4 });
+const auto m0_values_s16_nt = framework::dataset::make("M0", { 5 });
+const auto m0_values_s8_nt = framework::dataset::make("M0", { 6,7 });
+const auto m0_values_s8 = framework::dataset::make("M0", { 8 });
/** K0 values to test */
const auto k0_values_s32 = framework::dataset::make("K0", { 2, 3 });
@@ -101,6 +103,7 @@ FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrixFixture<int>, framework::Datas
// Validate output
validate(CLAccessor(_target), _reference);
}
+
FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
b_values),
@@ -114,6 +117,7 @@ FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::Dat
// Validate output
validate(CLAccessor(_target), _reference);
}
+
FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
b_values),
@@ -128,6 +132,37 @@ FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::Datas
validate(CLAccessor(_target), _reference);
}
+TEST_SUITE(NotTransposed)
+FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
+ b_values),
+ framework::dataset::make("DataType", DataType::S16)),
+ m0_values_s16_nt),
+ k0_values_s16),
+ v0_values),
+ i_values),
+ framework::dataset::make("transpose", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
+ b_values),
+ framework::dataset::make("DataType", DataType::S8)),
+ m0_values_s8_nt),
+ k0_values_s8),
+ v0_values),
+ i_values),
+ framework::dataset::make("transpose", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+
TEST_SUITE(ReinterpretInputAs3D)
FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrix3DFixture<int>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape3DShapes(),
diff --git a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
index ff1240ea2e..f8462058a6 100644
--- a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
diff --git a/tests/validation/CL/Gather.cpp b/tests/validation/CL/Gather.cpp
index f0b87d7d9f..7619baae1e 100644
--- a/tests/validation/CL/Gather.cpp
+++ b/tests/validation/CL/Gather.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,19 +48,21 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),
TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices data type
- TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices dimensionality
- TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32), // Invalid Input dimensionality
- TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Mismatching data type input/output
- TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid positive axis value
- TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Invalid negative axis value
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Output shape
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices data type
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices dimensionality
+ TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32), // Invalid Input dimensionality
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Mismatching data type input/output
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid positive axis value
+ TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Invalid negative axis value
}),
framework::dataset::make("IndicesInfo", {
TensorInfo(TensorShape(10U), 1, DataType::U32),
TensorInfo(TensorShape(10U), 1, DataType::U32),
TensorInfo(TensorShape(10U), 1, DataType::U32),
- TensorInfo(TensorShape(10U), 1, DataType::U8),
TensorInfo(TensorShape(10U, 10U), 1, DataType::U32),
+ TensorInfo(TensorShape(10U), 1, DataType::U8),
+ TensorInfo(TensorShape(10U, 10U, 10U, 10U), 1, DataType::U32),
TensorInfo(TensorShape(10U), 1, DataType::U32),
TensorInfo(TensorShape(10U), 1, DataType::U32),
TensorInfo(TensorShape(10U), 1, DataType::U32),
@@ -71,7 +73,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
+ TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 10U, 10U, 10U, 10U), 1, DataType::F32),
TensorInfo(TensorShape(10U, 5U, 5U, 5U, 5U), 1, DataType::F32),
TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
@@ -82,13 +85,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
1,
-2,
0,
+ 0,
1,
0,
1,
2,
-3,
})),
- framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false })),
+ framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false, false })),
input_info, indices_info, output_info, axis, expected)
{
const Status status = CLGather::validate(&input_info.clone()->set_is_resizable(true), &indices_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), axis);
@@ -111,6 +115,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+ CLGatherFixture<half>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge,
CLGatherFixture<half>,
framework::DatasetMode::NIGHTLY,
@@ -131,6 +144,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+ CLGatherFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge,
CLGatherFixture<float>,
framework::DatasetMode::NIGHTLY,
@@ -152,6 +174,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+ CLGatherFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge,
CLGatherFixture<uint8_t>,
framework::DatasetMode::NIGHTLY,
@@ -172,6 +203,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+ CLGatherFixture<uint16_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+
FIXTURE_DATA_TEST_CASE(RunLarge,
CLGatherFixture<uint16_t>,
framework::DatasetMode::NIGHTLY,
diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp
index c6006efcba..1f5b781690 100644
--- a/tests/validation/CL/Im2Col.cpp
+++ b/tests/validation/CL/Im2Col.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/gpu/cl/kernels/ClIm2ColKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
@@ -40,7 +40,7 @@ namespace validation
TEST_SUITE(CL)
TEST_SUITE(Im2Col)
-using CLIm2Col = CLSynthetizeFunction<CLIm2ColKernel>;
+using ClIm2Col = ClSynthetizeOperatorWithBorder<opencl::kernels::ClIm2ColKernel>;
/** Negative tests
*
@@ -63,7 +63,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto output = TensorInfo(TensorShape(9U, 10U, 12U, 2U), 1, DataType::F32);
const auto conv_size = Size2D(3, 3);
const bool has_bias = false;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -73,7 +73,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto output = TensorInfo(TensorShape(9U, 80U, 2U), 1, DataType::QASYMM8);
const auto conv_size = Size2D(3, 3);
const bool has_bias = true;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -84,7 +84,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto conv_size = Size2D(3, 3);
const auto dilation = Size2D(0, 1);
const bool has_bias = false;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -96,7 +96,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto dilation = Size2D(1, 1);
const bool has_bias = false;
const unsigned int num_groups = 2;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -108,7 +108,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto dilation = Size2D(1, 1);
const bool has_bias = false;
const unsigned int num_groups = 2;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -118,7 +118,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto output = TensorInfo(TensorShape(9U, 81U, 2U), 1, DataType::F32);
const auto conv_size = Size2D(3, 3);
const bool has_bias = false;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
@@ -128,13 +128,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
const auto output = TensorInfo(TensorShape(1U, 1U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC);
const auto conv_size = Size2D(9, 9);
const bool has_bias = false;
- const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+ const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
}
}
template <typename T>
-using CLIm2ColFixture = Im2ColValidationFixture<CLTensor, CLAccessor, CLIm2Col, T, true>;
+using ClIm2ColFixture = Im2ColOpValidationFixture<CLTensor, CLAccessor, ClIm2Col, T, true>;
TEST_SUITE(NHWC)
@@ -150,7 +150,7 @@ TEST_SUITE(NHWC)
* Kernel tested im2col3x3_nhwc
*/
FIXTURE_DATA_TEST_CASE(W3x3,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape",
@@ -180,7 +180,7 @@ framework::dataset::make("Groups", 1)))
* Kernel tested im2col9x9_nhwc
*/
FIXTURE_DATA_TEST_CASE(W9x9,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape",
@@ -210,7 +210,7 @@ framework::dataset::make("Groups", 1)))
* Kernel tested im2col_generic_nhwc
*/
FIXTURE_DATA_TEST_CASE(Generic,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape",
@@ -243,7 +243,7 @@ TEST_SUITE(NCHW)
* Kernel tested im2col1x1_stridex1_nchw
*/
FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", { TensorShape(4U, 4U, 3U, 2U), TensorShape(5U, 4U, 3U, 2U), TensorShape(3U, 4U, 3U, 2U) }),
@@ -267,7 +267,7 @@ FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad,
* Kernel tested im2col3x3_nchw
*/
FIXTURE_DATA_TEST_CASE(W3x3,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(4U, 4U, 3U, 2U)),
@@ -291,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(W3x3,
* Kernel tested im2col5x5_nchw
*/
FIXTURE_DATA_TEST_CASE(W5x5,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(7U, 4U, 3U, 2U)),
@@ -317,7 +317,7 @@ FIXTURE_DATA_TEST_CASE(W5x5,
* Kernel tested im2col11x11_padx0_pady0_nchw
*/
FIXTURE_DATA_TEST_CASE(W11x11_NoPad,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", { TensorShape(11U, 11U, 2U, 2U), TensorShape(14U, 13U, 1U, 2U) }),
@@ -341,7 +341,7 @@ FIXTURE_DATA_TEST_CASE(W11x11_NoPad,
* Kernel tested im2col_generic_padx0_pady0_nchw
*/
FIXTURE_DATA_TEST_CASE(GenericZeroPad,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(13U, 11U, 2U, 2U)),
@@ -367,7 +367,7 @@ TEST_SUITE_END() // NCHW
* Kernel tested im2col_generic_(nchw|nhwc)
*/
FIXTURE_DATA_TEST_CASE(Generic,
- CLIm2ColFixture<float>,
+ ClIm2ColFixture<float>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(13U, 11U, 5U, 2U)),
@@ -393,7 +393,7 @@ FIXTURE_DATA_TEST_CASE(Generic,
* - im2col9x9_nhwc
*/
FIXTURE_DATA_TEST_CASE(Quantized,
- CLIm2ColFixture<uint8_t>,
+ ClIm2ColFixture<uint8_t>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)),
@@ -419,7 +419,7 @@ FIXTURE_DATA_TEST_CASE(Quantized,
* - im2col9x9_nhwc
*/
FIXTURE_DATA_TEST_CASE(FP16,
- CLIm2ColFixture<half>,
+ ClIm2ColFixture<half>,
framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(
framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)),
diff --git a/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp
new file mode 100644
index 0000000000..67f70685d1
--- /dev/null
+++ b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
+
+using CLIndirectConv2dAddressPrecalculation = CLSynthetizeOperator<ClIndirectConv2dAddressPrecalculationKernel>;
+
+using CLIndirectConv2dAddressPrecalculationFixture = IndirectConv2dAddressPrecalculationValidationFixture<CLTensor, CLAccessor, CLIndirectConv2dAddressPrecalculation>;
+
+// *INDENT-OFF*
+// clang-format off
+/** Data types */
+
+namespace
+{
+const auto src_w_values = framework::dataset::make("src_w", {91});
+const auto src_h_values = framework::dataset::make("src_h", {103});
+const auto src_b_values = framework::dataset::make("src_b", {1, 2});
+const auto wei_w_values = framework::dataset::make("wei_w", {3, 5});
+const auto wei_h_values = framework::dataset::make("wei_h", {1, 6});
+const auto pad_values = framework::dataset::make("pad", {1, 2, 3});
+const auto stride_values = framework::dataset::make("stride", {1, 2});
+const auto m0_values = framework::dataset::make("M0", { 1, 2, 4, 5, 7 });
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(IndirectConv2dAddressPrecalculation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConv2dAddressPrecalculationFixture, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(src_w_values,
+ src_h_values),
+ src_b_values),
+ wei_w_values),
+ wei_h_values),
+ pad_values),
+ stride_values),
+ m0_values))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // IndirectConv2dAddressPrecalculation
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/IndirectConvolutionLayer.cpp b/tests/validation/CL/IndirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..aedf070e6b
--- /dev/null
+++ b/tests/validation/CL/IndirectConvolutionLayer.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
+
+// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse
+// the direct convolution fixture
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */
+constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+
+/** Activation function Dataset*/
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) });
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(IndirectConvolutionLayer)
+
+/** Check whether the configuration of a indirect convolution layer with no
+ * bias leads to a successful run.
+ */
+TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
+{
+ const TensorShape src_shape_nhwc = TensorShape(8U, 27U, 13U);
+ const TensorShape wei_shape_nhwc = TensorShape(8U, 3U, 3U, 4U);
+ const TensorShape bia_shape = TensorShape(4U);
+ const TensorShape dst_shape_nhwc = TensorShape(4U, 25U, 11U);
+ constexpr DataType dt = DataType::F32;
+ constexpr DataLayout data_layout = DataLayout::NHWC;
+
+ auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+ auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+ auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+
+ TensorShape src_shape_nchw = src_shape_nhwc;
+ TensorShape wei_shape_nchw = wei_shape_nhwc;
+ TensorShape dst_shape_nchw = dst_shape_nhwc;
+
+ permute(src_shape_nchw, PermutationVector(1U, 2U, 0U));
+ permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U));
+ permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+ const PadStrideInfo conv_info = PadStrideInfo(1, 1, 0, 0);
+
+ // Create indirect Convolution function
+ CLIndirectConvolutionLayer conv{};
+ conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info);
+
+ src_nhwc.allocator()->allocate();
+ wei_nhwc.allocator()->allocate();
+ dst_nhwc.allocator()->allocate();
+
+ library->fill_tensor_value(CLAccessor(src_nhwc), 1.f);
+ library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f);
+
+ conv.run();
+
+ // Compute reference to compare
+ SimpleTensor<float> ref_src{ src_shape_nchw, dt };
+ SimpleTensor<float> ref_wei{ wei_shape_nchw, dt };
+ SimpleTensor<float> ref_bia{ bia_shape, dt };
+ library->fill_tensor_value(ref_src, 1.f);
+ library->fill_tensor_value(ref_wei, 1.f);
+ // No bias
+ library->fill_tensor_value(ref_bia, 0.f);
+ auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info);
+
+ validate(CLAccessor(dst_nhwc), ref_dst);
+}
+
+/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal
+ * would lead to successful run
+ */
+TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT)
+{
+ const TensorShape src_shape_nhwc = TensorShape(3U, 33U, 27U);
+ const TensorShape wei_shape_nhwc = TensorShape(3U, 5U, 7U, 4U); // non-square kernel
+ const TensorShape bia_shape = TensorShape(4U);
+ const TensorShape dst_shape_nhwc = TensorShape(4U, 11U, 12U);
+ constexpr DataType dt = DataType::F32;
+ constexpr DataLayout data_layout = DataLayout::NHWC;
+
+ auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+ auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+ auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+
+ TensorShape src_shape_nchw = src_shape_nhwc;
+ TensorShape wei_shape_nchw = wei_shape_nhwc;
+ TensorShape dst_shape_nchw = dst_shape_nhwc;
+
+ permute(src_shape_nchw, PermutationVector(1U, 2U, 0U));
+ permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U));
+ permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+ const PadStrideInfo conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR);
+
+ // Create indirect convolution function
+ CLIndirectConvolutionLayer conv{};
+ conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info);
+
+ src_nhwc.allocator()->allocate();
+ wei_nhwc.allocator()->allocate();
+ dst_nhwc.allocator()->allocate();
+
+ library->fill_tensor_value(CLAccessor(src_nhwc), 1.f);
+ library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f);
+
+ conv.run();
+
+ // Compute reference to compare
+ SimpleTensor<float> ref_src{ src_shape_nchw, dt };
+ SimpleTensor<float> ref_wei{ wei_shape_nchw, dt };
+ SimpleTensor<float> ref_bia{ bia_shape, dt };
+ library->fill_tensor_value(ref_src, 1.f);
+ library->fill_tensor_value(ref_wei, 1.f);
+ // No bias
+ library->fill_tensor_value(ref_bia, 0.f);
+ auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info);
+
+ validate(CLAccessor(dst_nhwc), ref_dst);
+}
+// *INDENT-OFF*
+// clang-format off
+// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse
+// the direct convolution fixture
+template <typename T>
+using CLIndirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T>;
+template <typename T>
+using CLIndirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T, true>;
+
+TEST_SUITE(NHWC)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U),
+ TensorShape(32U, 37U, 13U) } ),
+ framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+ framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+ framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+ framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+ framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+ framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+ framework::dataset::make("StrideX", { 1 })),
+ framework::dataset::make("StrideY", { 1 })),
+ framework::dataset::make("PadX", { 1 })),
+ framework::dataset::make("PadY", { 1 })),
+ framework::dataset::make("KernelSize", { 9 })),
+ framework::dataset::make("NumKernels", { 3 })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U),
+ TensorShape(32U, 37U, 13U) } ),
+ framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+ framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+ framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+ framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+ framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+ framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLIndirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U),
+ TensorShape(32U, 37U, 13U) } ),
+ framework::dataset::make("StrideX", { 1 })),
+ framework::dataset::make("StrideY", { 2 })),
+ framework::dataset::make("PadX", { 1 })),
+ framework::dataset::make("PadY", { 3 })),
+ framework::dataset::make("KernelSize", { 3 })),
+ framework::dataset::make("NumKernels", { 3 })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+ framework::dataset::make("StrideX", { 1 })),
+ framework::dataset::make("StrideY", { 1 })),
+ framework::dataset::make("PadX", { 1 })),
+ framework::dataset::make("PadY", { 1 })),
+ framework::dataset::make("KernelSize", { 9 })),
+ framework::dataset::make("NumKernels", { 3 })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // NHWC
+TEST_SUITE_END() // IndirectConvolutionLayer
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMul.cpp b/tests/validation/CL/MatMul.cpp
new file mode 100644
index 0000000000..844597f3e9
--- /dev/null
+++ b/tests/validation/CL/MatMul.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLMatMul.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
+#include "tests/framework/DatasetModes.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/TestCase.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/validation/fixtures/MatMulFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+constexpr float abs_tolerance_f32(
+ 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+ 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data type in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+} // namespace
+
+template <typename T>
+using CLMatMulFixture = MatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLQuantizedMatMulFixture = QuantizedMatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLMatMulActivationFixture = MatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLMatMulActivationAlphaBetaFixture = MatMulValidationWithActivationAlphaBetaFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLQuantizedMatMulActivationFixture = QuantizedMatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+/* The main act functions matmul (float) is expected to support */
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f),
+});
+
+/* (Float datatype only) Larger activation functions dataset, used during some nightly tests. */
+const auto AllActivationsDataset = combine(datasets::ActivationFunctions(), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+// Alpha beta values should be integer values
+// This is for testing purposes with quantized datatypes and is not a limitation of the kernel.
+// To properly remove this restriction, dst_qinfo should be auto-initialised with consideration for alpha beta values
+// The main act functions quantized matmul kernels are expected to support
+const auto ActivationFunctionsQuantizedDataset = concat(concat(concat(
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo()),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 1.f))),
+ framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 1.f)));
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMul)
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunAllActivations, CLMatMulActivationAlphaBetaFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SmallerMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ AllActivationsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
+ datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ ActivationFunctionsQuantizedDataset),
+ framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+ framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+ framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+ framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
+ datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ ActivationFunctionsQuantizedDataset),
+ framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+ framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+ framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+ framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
+ datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ ActivationFunctionsQuantizedDataset),
+ framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+ framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+ framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+ framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
+ datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ ActivationFunctionsQuantizedDataset),
+ framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+ framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+ framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+ framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 50) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // MatMul
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulKernel.cpp b/tests/validation/CL/MatMulKernel.cpp
new file mode 100644
index 0000000000..b47f8bc924
--- /dev/null
+++ b/tests/validation/CL/MatMulKernel.cpp
@@ -0,0 +1,650 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr float abs_tolerance_f32(
+ 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+ 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+} // namespace
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test --precommit*/
+const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 values to test --nightly*/
+const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 });
+const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 });
+const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+
+template <typename T>
+using CLMatMulKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeKernel>;
+
+template <typename T>
+using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeKernel>;
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
+{
+ using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+ const std::vector<MatMulConfigurationPair> supported_block_sizes =
+ {
+ // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+ // Lhs not-transposed, Rhs-not-transposed
+ { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0
+ { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 9, 1, 2), true },
+ { MatMulKernelInfo(false, false, 3, 16, 3), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, false, 7, 1, 4, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, false, 7, 12, 4, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, false, 7, 4, 4, true), true },
+ { MatMulKernelInfo(false, false, 7, 8, 4, true), true },
+ { MatMulKernelInfo(false, false, 7, 16, 4, true), true },
+
+ // Lhs not-transposed, Rhs transposed
+ { MatMulKernelInfo(false, true, 0, 1, 1), false }, // M0 should be > 0
+ { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, true, 3, 3, 12), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, true, 3, 3, 6), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, true, 5, 1, 2), true },
+ { MatMulKernelInfo(false, true, 3, 3, 3), true },
+ { MatMulKernelInfo(false, true, 2, 4, 8), true },
+ { MatMulKernelInfo(false, true, 2, 4, 5, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, true, 2, 4, 9, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, true, 2, 4, 3, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(false, true, 2, 4, 4, true), true },
+ { MatMulKernelInfo(false, true, 2, 4, 8, true), true },
+ { MatMulKernelInfo(false, true, 2, 8, 16, true), true },
+
+ // Lhs transposed, Rhs-not-transposed
+ { MatMulKernelInfo(true, false, 1, 1, 0), false }, // K0 should be > 0
+ { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 4, 1, 22), true },
+ { MatMulKernelInfo(true, false, 3, 3, 3), true },
+ { MatMulKernelInfo(true, false, 2, 4, 8), true },
+ { MatMulKernelInfo(true, false, 2, 3, 8, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, false, 2, 7, 8, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, false, 2, 5, 8, true), false }, // N0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, false, 2, 4, 8, true), true },
+ { MatMulKernelInfo(true, false, 2, 8, 8, true), true },
+ { MatMulKernelInfo(true, false, 2, 16, 8, true), true },
+
+ // Lhs transposed, Rhs-transposed
+ { MatMulKernelInfo(true, true, 2, 1, 5), false }, // K0 should in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 1, 8, 7), false }, // K0 should in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 4, 8, 16), true },
+ { MatMulKernelInfo(true, true, 3, 3, 4), true },
+ { MatMulKernelInfo(true, true, 16, 4, 8), true },
+ { MatMulKernelInfo(true, true, 2, 2, 1, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, true, 2, 2, 5, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, true, 2, 4, 7, true), false }, // K0 not in {4, 8, 16}
+ { MatMulKernelInfo(true, true, 2, 4, 4, true), true },
+ { MatMulKernelInfo(true, true, 2, 8, 8, true), true },
+ { MatMulKernelInfo(true, true, 2, 8, 16, true), true },
+ };
+
+ // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+ // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+ // not the shapes themselves.
+ const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+ const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+
+ const bool export_to_cl_image_supported = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+ for(auto &pair : supported_block_sizes)
+ {
+ TensorInfo output_info;
+ Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+
+ if(!pair.first.export_rhs_to_cl_image || export_to_cl_image_supported)
+ {
+ ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+ }
+ }
+}
+
+TEST_CASE(ExportToCLImage, framework::DatasetMode::ALL)
+{
+ // We skip this test if the hardware does not support exporting to CL Image
+ if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+ {
+ constexpr size_t pixel_size = 4;
+ const size_t max_image_w = pixel_size * CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+ const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool, bool, bool>;
+ const std::vector<ShapeConfigurationTuple> shape_configurations =
+ {
+ // lhs_shape, rhs_shape, adj_lhs, adj_rhs, expected
+ // Lhs t/Nt, Rhs Nt
+ // Transposition of Lhs doesn't add any value to the tests, therefore always assumed false below
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), false, false, false }, // N should be multiple of 4
+ { TensorShape(5U, 1U), TensorShape(14U, 5U), false, false, false }, // N should be multiple of 4
+ { TensorShape(5U, 1U), TensorShape(12U, 5U), false, false, true },
+ { TensorShape(5U, 1U), TensorShape(8U, 5U), false, false, true },
+ { TensorShape(5U, 1U), TensorShape(4U, 5U), false, false, true },
+ { TensorShape(max_image_h + 1, 1U), TensorShape(4U, max_image_h + 1), false, false, false }, // Cannot fit into CL Image memory's height
+ { TensorShape(5U, 1U), TensorShape(max_image_w + 1, 5U), false, false, false }, // Cannot fit into CL Image memory's width
+ { TensorShape(max_image_h, 1U), TensorShape(4U, max_image_h), false, false, true }, // Barely fits into CL Image memory's height
+ { TensorShape(5U, 1U), TensorShape(max_image_w, 5U), false, false, true }, // Barely fits into CL Image memory's width
+
+ // Lhs Nt/T , Rhs T
+ { TensorShape(5U, 1U), TensorShape(5U, 3U), false, true, false }, // K should be multiple of 4
+ { TensorShape(5U, 1U), TensorShape(5U, 14U), false, true, false }, // K should be multiple of 4
+ { TensorShape(4U, 1U), TensorShape(4U, 10U), false, true, true },
+ { TensorShape(8U, 1U), TensorShape(8U, 9U), false, true, true },
+ { TensorShape(12U, 1U), TensorShape(12U, 6U), false, true, true },
+ };
+
+ for(auto &tuple : shape_configurations)
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+
+ const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+ const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+
+ const bool adj_lhs = std::get<2>(tuple);
+ const bool adj_rhs = std::get<3>(tuple);
+
+ // We choose M0, N0, K0 equal to 4 so that they're always valid for CLImage in any combination
+ const MatMulKernelInfo matmul_kernel_info
+ {
+ adj_lhs, adj_rhs, 4, 4, 4, true /* export_rhs_to_cl_image */
+ };
+
+ TensorInfo output_info;
+ Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info);
+
+ const bool expected = std::get<4>(tuple);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+ const std::vector<ShapeConfigurationTuple> shape_configurations =
+ {
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true },
+ { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension
+ { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+ { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+ { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+ { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false }, // Unsupported bias broadcasting.
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false }, // 2D bias is unsupported.
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(6U), false }, // bias first dimension != dst first dimension
+ };
+
+ for(auto &tuple : shape_configurations)
+ {
+ const bool expected = std::get<3>(tuple);
+
+ for(bool adj_lhs :
+ {
+ false, true
+ })
+ {
+ for(bool adj_rhs :
+ {
+ false, true
+ })
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+ TensorShape bia_shape = std::get<2>(tuple);
+
+ if(adj_lhs)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+
+ if(adj_rhs)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+ const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+ const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32);
+ TensorInfo output_info;
+
+ MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+ Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>;
+ const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+ {
+ { DataType::F32, DataType::F32, DataType::F32, true },
+ { DataType::F16, DataType::F16, DataType::F16, true },
+ { DataType::F16, DataType::F32, DataType::F32, false }, // no mixed precision
+ { DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false }, // no quantized types
+ { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false }, // no quantized types
+ { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types
+ { DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false }, // no quantized types
+ { DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false }, // no quantized types
+ { DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false }, // no quantized types
+ { DataType::S64, DataType::S64, DataType::S64, false }, // no integral types
+ { DataType::S32, DataType::S32, DataType::S32, false }, // no integral types
+ { DataType::S16, DataType::S16, DataType::S16, false }, // no integral types
+ { DataType::S8, DataType::S8, DataType::S8, false }, // no integral types
+ { DataType::U64, DataType::U64, DataType::U64, false }, // no integral types
+ { DataType::U32, DataType::U32, DataType::U32, false }, // no integral types
+ { DataType::U16, DataType::U16, DataType::U16, false }, // no integral types
+ { DataType::U8, DataType::U8, DataType::U8, false }, // no integral types
+ };
+
+ const TensorShape shape = TensorShape(10U, 10U);
+ const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+ for(auto &tuple : data_type_configurations)
+ {
+ const bool expected = std::get<3>(tuple);
+
+ const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+ const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+ TensorInfo output_info(shape, 1, std::get<2>(tuple));
+
+ Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_nt_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_t_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 2 })),
+ framework::dataset::make("K0", { 2 })),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE(ExportRhsToCLImage)
+FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { false })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 4, 8, 16 })),
+ framework::dataset::make("K0", { 2, 4 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { false })),
+ framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+ framework::dataset::make("N0", { 4, 8, 16 })),
+ framework::dataset::make("K0", { 1, 2, 3, 4 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 2, 4 })),
+ framework::dataset::make("K0", { 4, 8, 16 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true })),
+ framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+ framework::dataset::make("N0", { 1, 2, 3, 4 })),
+ framework::dataset::make("K0", { 4, 8, 16 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+TEST_SUITE_END() // ExportRhsToCLImage
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_nt_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_t_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE(ExportRhsToCLImage)
+FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { false })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 4, 8, 16 })),
+ framework::dataset::make("K0", { 2, 4 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { false })),
+ framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+ framework::dataset::make("N0", { 4, 8, 16 })),
+ framework::dataset::make("K0", { 1, 2, 3, 4 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 2, 4 })),
+ framework::dataset::make("K0", { 4, 8, 16 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true })),
+ framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+ framework::dataset::make("N0", { 1, 2, 3, 4 })),
+ framework::dataset::make("K0", { 4, 8, 16 })),
+ framework::dataset::make("ExportRhsToCLImage", { true })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_export_to_cl_image)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+TEST_SUITE_END() // ExportRhsToCLImage
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // MatMulKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulLowpNativeKernel.cpp b/tests/validation/CL/MatMulLowpNativeKernel.cpp
new file mode 100644
index 0000000000..90eee4fb82
--- /dev/null
+++ b/tests/validation/CL/MatMulLowpNativeKernel.cpp
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.h"
+
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+}
+template <typename T>
+using CLMatMulLowpNativeKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeKernel>;
+
+template <typename T>
+using CLMatMulLowpKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeKernel>;
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test --precommit*/
+const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 values to test --nightly*/
+const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 });
+const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 });
+const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulLowpNativeKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL)
+{
+ using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+ const std::vector<MatMulConfigurationPair> supported_block_sizes =
+ {
+ // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+ // Lhs not-transposed, Rhs-not-transposed
+ { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0
+ { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 9, 1, 2), true },
+ { MatMulKernelInfo(false, false, 3, 16, 3), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4, true), true }, // export to CLImage is unsupported for quantized types
+ };
+
+ // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+ // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+ // not the shapes themselves.
+ const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED);
+
+ for(auto &pair : supported_block_sizes)
+ {
+ TensorInfo output_info;
+ Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+
+ ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+ const std::vector<ShapeConfigurationTuple> shape_configurations =
+ {
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true },
+ { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension
+ { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+ { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+ { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+ { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false }, // invalid broadcast of bias
+ { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false }, // 2d bias is invalid
+ };
+
+ for(auto &tuple : shape_configurations)
+ {
+ const bool expected = std::get<3>(tuple);
+
+ for(bool adj_lhs :
+ {
+ false, true
+ })
+ {
+ for(bool adj_rhs :
+ {
+ false, true
+ })
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+ TensorShape bia_shape = std::get<2>(tuple);
+
+ if(adj_lhs)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+
+ if(adj_rhs)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32);
+ TensorInfo output_info;
+
+ MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+ Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+ using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+ const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+ {
+ { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types
+ { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types
+ { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true },
+ { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true },
+ { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false }, // no mixed data types
+ { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types
+ { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types
+ { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types
+ { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types
+ { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types
+ { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types
+ { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types
+ { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false } // Only S32 bias is supported
+ };
+
+ // It's enough to test a single shape and block size configuration while checking data types
+ const TensorShape shape = TensorShape(10U, 10U);
+ const TensorShape bia_shape = TensorShape(10U);
+ const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+ for(auto &tuple : data_type_configurations)
+ {
+ const bool expected = std::get<4>(tuple);
+
+ const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+ const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+ const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+ TensorInfo output_info(shape, 1, std::get<3>(tuple));
+
+ Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpKernelWithBiasFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_nt_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_t_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 2 })),
+ framework::dataset::make("K0", { 2 })),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+ framework::dataset::make("TransposeA", { true, false })),
+ framework::dataset::make("TransposeB", { true, false })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_values_precommit),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_nt_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_values_nightly_lhs_t_rhs_nt),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_values_nightly_rhs_t),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // MatMulLowpNativeKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp
new file mode 100644
index 0000000000..ac46b67c9e
--- /dev/null
+++ b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.h"
+
+#include "tests/datasets/MatMulLowpMMULDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+}
+using framework::dataset::make;
+
+template <typename T>
+using CLMatMulLowpNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>;
+
+template <typename T>
+using CLMatMulLowpNativeMMULKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>;
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 2, 4, 5, 8 });
+const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 2, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly = framework::dataset::make("N0", { 1, 3, 8, 16 });
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulLowpNativeMMULKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL)
+{
+ using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+ const std::vector<MatMulConfigurationPair> supported_block_sizes =
+ {
+ // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+ { MatMulKernelInfo(false, false, 0, 1, 4), false }, // M0 should be > 0
+ { MatMulKernelInfo(false, true, 3, 5, 4), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 6, 4), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 8), false }, // K0 not in 4
+ { MatMulKernelInfo(true, false, 5, 3, 4), false }, // M0 not in {1, 2, 3, 4, 8, 16} when Lhs is transposed
+ { MatMulKernelInfo(false, false, 9, 1, 4), true },
+ { MatMulKernelInfo(false, true, 3, 16, 4), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4), true },
+ { MatMulKernelInfo(true, false, 8, 3, 4), true },
+ { MatMulKernelInfo(true, true, 4, 3, 4), true },
+ { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // export to CLImage is unsupported for quantized types
+ };
+
+ // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+ // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+ // not the shapes themselves.
+ const TensorInfo lhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo rhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED);
+
+ for(auto &pair : supported_block_sizes)
+ {
+ TensorInfo output_info;
+ Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+ const bool expected = (pair.second && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+ const std::vector<ShapeConfigurationTuple> shape_configurations =
+ {
+ { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U), true },
+ { TensorShape(16U, 12U), TensorShape(3U, 16U), TensorShape(3U), true },
+ { TensorShape(64U, 4U), TensorShape(2U, 64U), TensorShape(2U), true },
+ { TensorShape(16U, 4U), TensorShape(2U, 32U), TensorShape(2U), false }, // Mismatch in the K dimension
+ { TensorShape(16U, 0U), TensorShape(2U, 16U), TensorShape(2U), false }, // Invalid dimension
+ { TensorShape(32U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+ { TensorShape(32U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+ { TensorShape(32U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+ { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(1U), false }, // invalid broadcast of bias
+ { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U, 3U), false }, // 2d bias is invalid
+ { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), false }, // K must be multiple of 16
+ };
+
+ for(auto &tuple : shape_configurations)
+ {
+ const bool expected = (std::get<3>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+ for(bool adj_lhs :
+ {
+ false, true
+ })
+ {
+ for(bool adj_rhs :
+ {
+ false, true
+ })
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+ TensorShape bia_shape = std::get<2>(tuple);
+
+ if(adj_lhs)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+
+ if(adj_rhs)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED);
+ const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32);
+ TensorInfo output_info;
+
+ MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 4, false /* export_rhs_to_cl_image */ };
+
+ Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+ using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+ const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+ {
+ { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types
+ { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types
+ { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true },
+ { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true },
+ { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // only qasymm8/qasymm8_signed is supported
+ { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false }, // no mixed data types
+ { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types
+ { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types
+ { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types
+ { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types
+ { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types
+ { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types
+ { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types
+ { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false } // Only S32 bias is supported
+ };
+
+ // It's enough to test a single shape and block size configuration while checking data types
+ const TensorShape shape = TensorShape(48U, 48U);
+ const TensorShape bia_shape = TensorShape(48U);
+ const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 4, false };
+ for(auto &tuple : data_type_configurations)
+ {
+ const bool expected = (std::get<4>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+ const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+ const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+ const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+ TensorInfo output_info(shape, 1, std::get<3>(tuple));
+
+ Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+ framework::DatasetMode::ALL,
+ combine(datasets::SmallMatMulLowpMMULDataset(),
+ make("TransposeA", { false, true }),
+ make("TransposeB", { false, true }),
+ m0_values_precommit,
+ n0_values_precommit,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<int8_t>,
+ framework::DatasetMode::ALL,
+ combine(datasets::SmallMatMulLowpMMULWithBiasDataset(),
+ make("TransposeA", { false, true }),
+ make("TransposeB", { false, true }),
+ m0_values_precommit,
+ n0_values_precommit,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulLowpMMULDataset(),
+ make("TransposeA", { false }),
+ make("TransposeB", { false, true }),
+ m0_values_nightly_lhs_nt,
+ n0_values_nightly,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulLowpMMULDataset(),
+ make("TransposeA", { true }),
+ make("TransposeB", { false, true }),
+ m0_values_nightly_lhs_t,
+ n0_values_nightly,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+ framework::DatasetMode::ALL,
+ combine(datasets::HighDimensionalMatMulLowpMMULDataset(),
+ make("TransposeA", { false, true }),
+ make("TransposeB", { false, true }),
+ make("M0", { 2 }),
+ make("N0", { 2 }),
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8_SIGNED)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+ framework::DatasetMode::ALL,
+ combine(datasets::SmallMatMulLowpMMULDatasetSubset(),
+ make("TransposeA", { false, true }),
+ make("TransposeB", { false, true }),
+ m0_values_precommit,
+ n0_values_precommit,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<uint8_t>,
+ framework::DatasetMode::ALL,
+ combine(datasets::SmallMatMulLowpMMULWithBiasDataset(),
+ make("TransposeA", { false, true }),
+ make("TransposeB", { false, true }),
+ m0_values_precommit,
+ n0_values_precommit,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulLowpMMULDataset(),
+ make("TransposeA", { false }),
+ make("TransposeB", { false, true }),
+ m0_values_nightly_lhs_nt,
+ n0_values_nightly,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulLowpMMULDataset(),
+ make("TransposeA", { true }),
+ make("TransposeB", { false, true }),
+ m0_values_nightly_lhs_t,
+ n0_values_nightly,
+ make("K0", { 4 }),
+ make("ExportRhsToCLImage", { false }),
+ make("DataType", DataType::QASYMM8)))
+{
+ if(_device_supports_mmul)
+ {
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_quant);
+ }
+}
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // MatMulLowpNativeMMULKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulNativeMMULKernel.cpp b/tests/validation/CL/MatMulNativeMMULKernel.cpp
new file mode 100644
index 0000000000..655dd354dc
--- /dev/null
+++ b/tests/validation/CL/MatMulNativeMMULKernel.cpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
+#include "tests/datasets/LargeMatMulMMULDataset.h"
+#include "tests/datasets/SmallMatMulMMULDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr float abs_tolerance_f32(
+ 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+ 0.02f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+} // namespace
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 value -- Fixed to 1 */
+const auto k0_value = framework::dataset::make("K0", { 1 });
+
+template <typename T>
+using CLMatMulNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>;
+
+template <typename T>
+using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>;
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulNativeMMULKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
+{
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+ const std::vector<MatMulConfigurationPair> supported_block_sizes =
+ {
+ // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+ // Lhs not-transposed, Rhs not-transposed
+ { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0
+ { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(false, false, 3, 3, 4), false }, // K0 not 1
+ { MatMulKernelInfo(false, false, 9, 1, 1), true },
+ { MatMulKernelInfo(false, false, 3, 16, 1), true },
+ { MatMulKernelInfo(false, false, 7, 3, 1), true },
+
+ // Lhs transposed, Rhs not-transposed
+ { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 6, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 5, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, false, 2, 2, 2), false }, // K0 is not 1
+ { MatMulKernelInfo(true, false, 4, 1, 1), true },
+ { MatMulKernelInfo(true, false, 3, 3, 1), true },
+ { MatMulKernelInfo(true, false, 2, 4, 1), true },
+
+ // Lhs not-transposed, Rhs not-transposed
+ { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+ { MatMulKernelInfo(false, true, 2, 17, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+ { MatMulKernelInfo(false, true, 4, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+ { MatMulKernelInfo(false, true, 4, 4, 7), false }, // K0 is not 1
+ { MatMulKernelInfo(false, true, 4, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+ { MatMulKernelInfo(false, true, 3, 8, 1), true },
+ { MatMulKernelInfo(false, true, 8, 16, 1), true },
+ { MatMulKernelInfo(false, true, 2, 4, 1), true },
+
+ // Lhs transposed, Rhs transposed
+ { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 6, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 5, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+ { MatMulKernelInfo(true, true, 4, 8, 2), false }, // K0 is not 1
+ { MatMulKernelInfo(true, true, 4, 8, 1), true },
+ { MatMulKernelInfo(true, true, 3, 3, 1), true },
+ { MatMulKernelInfo(true, true, 16, 4, 1), true },
+ };
+
+ // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+ // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+ // not the shapes themselves.
+ const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+ const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+
+ for(auto &pair : supported_block_sizes)
+ {
+ TensorInfo output_info;
+ Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+ ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; // lhs, rhs, bias, result
+ const std::vector<ShapeConfigurationTuple> shape_configurations =
+ {
+ { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(3U), true },
+ { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+ { TensorShape(8U, 4U), TensorShape(2U, 4U), TensorShape(2U), false }, // Mismatch in the K dimension
+ { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+ { TensorShape(5U, 7U), TensorShape(2U, 5U), TensorShape(2U), false }, // K not a multiple of 4 (MMUL_K0)
+ { TensorShape(8U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 8U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+ { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // No batch broadcasting
+ { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // Mismatch in batch dimension
+ { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(1U), false }, // Bias first dimensions != dst first dimension.
+ { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(5U, 6U), false }, // Bias is 2d which is invalid.
+ };
+
+ for(auto &tuple : shape_configurations)
+ {
+ const bool expected = std::get<3>(tuple);
+
+ for(bool adj_lhs :
+ {
+ false, true
+ })
+ {
+ for(bool adj_rhs :
+ {
+ false, true
+ })
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+ TensorShape bia_shape = std::get<2>(tuple);
+
+ if(adj_lhs)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+
+ if(adj_rhs)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+ const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+ const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32);
+ TensorInfo output_info;
+
+ MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+ Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+ if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+ {
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+ const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+ {
+ { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true },
+ { DataType::F16, DataType::F16, DataType::F16, DataType::F16, true },
+ { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true },
+ { DataType::F32, DataType::F32, DataType::F16, DataType::F32, false }, // incorrect bias type
+ { DataType::F16, DataType::F32, DataType::F32, DataType::F32, false }, // no mixed precision
+ { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+ { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, false }, // no quantized types
+ { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, false }, // no quantized types
+ { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types
+ { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // no quantized types
+ { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // no quantized types
+ { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // no quantized types
+ { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types
+ { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types
+ { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types
+ { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types
+ { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types
+ { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types
+ { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types
+ { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types
+ };
+
+ const TensorShape shape = TensorShape(8U, 8U);
+ const TensorShape bia_shape = TensorShape(8U);
+ const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+ for(auto &tuple : data_type_configurations)
+ {
+ const bool expected = std::get<4>(tuple);
+
+ const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+ const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+ const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+ TensorInfo output_info(shape, 1, std::get<3>(tuple));
+
+ Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ framework::dataset::make("M0", { 2 })),
+ framework::dataset::make("N0", { 2 })),
+ framework::dataset::make("K0", { 1 })),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+ }
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false, true })),
+ framework::dataset::make("TransposeB", { false, true })),
+ m0_values_precommit),
+ n0_values_precommit),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_nt),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { false })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_nt),
+ n0_values_nightly_rhs_t),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { false })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_nt),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+ framework::dataset::make("TransposeA", { true })),
+ framework::dataset::make("TransposeB", { true })),
+ m0_values_nightly_lhs_t),
+ n0_values_nightly_rhs_t),
+ k0_value),
+ framework::dataset::make("ExportRhsToCLImage", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ // Validate output
+ if(_device_supports_mmul)
+ {
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ }
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // MatMulNativeMMULKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MaxUnpoolingLayer.cpp b/tests/validation/CL/MaxUnpoolingLayer.cpp
index 6cba8b8bd5..cf4fcdda70 100644
--- a/tests/validation/CL/MaxUnpoolingLayer.cpp
+++ b/tests/validation/CL/MaxUnpoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,20 +51,19 @@ const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(framework::datase
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
framework::dataset::make("DataType", DataType::F32))),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
))
{
- printf("validate\n");
// Validate output
validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
framework::dataset::make("DataType", DataType::F16))),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp
index 043262d891..f3f1c8b1b8 100644
--- a/tests/validation/CL/PReluLayer.cpp
+++ b/tests/validation/CL/PReluLayer.cpp
@@ -56,7 +56,7 @@ const auto PReluLayerQASYMM8Dataset = combine(combine(framework::dataset::make("
const auto PReluLayerQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataType",
DataType::QASYMM8_SIGNED));
-const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
framework::dataset::make("DataType", DataType::S16));
const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
@@ -71,21 +71,18 @@ TEST_SUITE(PReluLayer)
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
}),
framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false})),
+ framework::dataset::make("Expected", { true, false, false})),
input1_info, input2_info, output_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLPReluLayer::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp
index 370195b078..ea0cb32785 100644
--- a/tests/validation/CL/PadLayer.cpp
+++ b/tests/validation/CL/PadLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/graph/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "src/graph/mutators/MutatorUtils.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -110,6 +112,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS);
}
+DATA_TEST_CASE(CheckFusingWithConvolution, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("DataLayout", { DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NCHW,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::NHWC,
+ DataLayout::UNKNOWN
+ }),
+ framework::dataset::make("PaddingList", { PaddingList({{0, 0}, {1, 1}, {1, 1}}), // nchw
+ PaddingList({{1, 1}, {1, 1}, {0, 0}, {0, 0}}),
+ PaddingList({{1, 1}, {1, 1}}),
+ PaddingList({}),
+ PaddingList({{0, 0}}),
+ PaddingList({{0, 0}, {0, 0}, {0, 0}, {0, 0}}),
+ PaddingList({{0, 0}, {0, 0}, {0, 0}, {1, 0}}),
+ PaddingList({{0, 1}}),
+ PaddingList({{0, 0}, {1, 1}, {1, 1}}), // nhwc
+ PaddingList({{0, 0}, {0, 0}, {1, 1}, {1, 1}}),
+ PaddingList({{0, 0}, {1, 0}, {1, 1}, {0, 0}}),
+ PaddingList({}),
+ PaddingList({{0, 0}}),
+ PaddingList({{0, 1}}),
+ PaddingList({{0, 0}, {1, 1}}),
+ PaddingList({{0, 0}})
+ })), // unknown
+ framework::dataset::make("Expected", { false, // nchw
+ true,
+ true,
+ true,
+ true,
+ true,
+ false,
+ true,
+ true, // nhwc
+ false,
+ true,
+ true,
+ true,
+ false,
+ true,
+ false // unknown
+ })),
+ data_layout, padding_list, expected)
+{
+ ARM_COMPUTE_EXPECT(expected == arm_compute::graph::is_padding_in_height_or_width(data_layout, padding_list), framework::LogLevel::ERRORS);
+}
+
// clang-format on
// *INDENT-ON*
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index 9e0a6243d7..62ff15a37f 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -36,6 +36,9 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Mul.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
namespace
@@ -50,9 +53,6 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
});
-// Since in-place computation on CL-side hasn't been intended to be implemented, they are not tested.
-// However, this dataset is required for the shared fixture and it would make extension easier when
-// CL-side also starts supporting in-place computation.
const auto InPlaceDataSet = framework::dataset::make("InPlace", { false });
} //namespace
// *INDENT-OFF*
@@ -124,7 +124,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationIntegerFixture<int>, f
datasets::SmallShapes(),
framework::dataset::make("DataType1", DataType::S32)),
framework::dataset::make("DataType2", DataType::S32)),
- framework::dataset::make("Scale", {1.f})),
+ framework::dataset::make("Scale", { 1.f })),
datasets::ConvertPolicies()),
framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
EmptyActivationFunctionsDataset),
@@ -132,6 +132,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationIntegerFixture<int>, f
{
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationIntegerFixture<int>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+ framework::dataset::make("DataType1", DataType::S32)),
+ framework::dataset::make("DataType2", DataType::S32)),
+ framework::dataset::make("Scale", { 1.f })),
+ datasets::ConvertPolicies()),
+ framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+ EmptyActivationFunctionsDataset),
+ framework::dataset::make("InPlace", { true })))
+{
+ validate(CLAccessor(_target), _reference);
+}
TEST_SUITE_END()
TEST_SUITE(F16toF16)
@@ -147,6 +159,19 @@ TEST_SUITE(F32toF32)
TEST_SUITE(Scale255)
PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_255, TO_NEAREST_UP, EmptyActivationFunctionsDataset, VALIDATE(float, 1.f))
PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivation, ToF32Fixture<float>, ALL, TinyShapes(), F32, F32, scale_255, TO_NEAREST_UP, ActivationFunctionsDataset, VALIDATE(float, 1.f))
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationToF32Fixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+ framework::dataset::make("DataTypeIn1", DataType::F32)),
+ framework::dataset::make("DataTypeIn2", DataType::F32)),
+ framework::dataset::make("Scale", { scale_255 })),
+ datasets::ConvertPolicies()),
+ framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+ EmptyActivationFunctionsDataset),
+ framework::dataset::make("InPlace", { true })))
+{
+ // Validate output
+ VALIDATE(float, 1.f)
+}
TEST_SUITE_END() // Scale255
TEST_SUITE_END() // F32toF32
@@ -200,6 +225,23 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLPixelWiseMultiplicationQuantizedBroa
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ framework::dataset::make("DataTypeIn1", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeIn2", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { 1.f, 2.f })),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("OUtQInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("InPlace", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
diff --git a/tests/validation/CL/Pooling3dLayer.cpp b/tests/validation/CL/Pooling3dLayer.cpp
new file mode 100644
index 0000000000..84d630e6cf
--- /dev/null
+++ b/tests/validation/CL/Pooling3dLayer.cpp
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/TensorShape.h"
+#include "tests/framework/datasets/Datasets.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLPooling3dLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/Pooling3dLayerDataset.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/Pooling3dLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets for floating-point data types */
+const auto Pooling3dLayerDatasetFP = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 1, 0), Padding3D(1, 1, 1) })),
+ framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 2, 2), Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(2, 2, 2), Size3D(2, 1, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3DLayerDatasetQuantized = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+ framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(1, 1, 2), Size3D(2, 2, 1)})),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true }));
+
+using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
+
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.1f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8_SIGNED integer datatype*/
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Pooling3dLayer)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Mismatching data type
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output shape
+ TensorInfo(TensorShape(5U, 13U, 15U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global Pooling
+ TensorInfo(TensorShape(13U,13U, 5U, 1U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data type
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 5U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(1U, 16U, 1U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ }),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U, 3U, 3U), 1, DataType::F16, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 30U, 11U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 25U, 16U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 27U, 13U, 3U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 1U, 1U, 1U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global pooling applied
+ TensorInfo(TensorShape(5U, 2U, 2U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling
+ TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::QASYMM8, DataLayout::NDHWC), // Invalid data type
+ TensorInfo(TensorShape(5U, 1U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(1U, 15U, 1U, 2U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Output width larger than input
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ })),
+ framework::dataset::make("PoolInfo", { Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(2, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::L2, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG),
+ Pooling3dLayerInfo(PoolingType::MAX),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG),
+ Pooling3dLayerInfo(PoolingType::MAX, 2, Size3D(1, 1, 2), Padding3D(0, 0, 0), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(2U, 2U, 2U), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), true), // Pool size is smaller than the padding size with padding excluded
+ Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), false), // Pool size is smaller than the padding size with padding included
+ Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(2U, 2U, 2U), Padding3D(2,1,2,2,1,2), false, false, DimensionRoundingType::CEIL), // CEIL with asymmetric Padding
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, true , false, true, false, false, false})),
+ input_info, output_info, pool_info, expected)
+{
+ ARM_COMPUTE_EXPECT(bool(CLPooling3dLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+
+
+template <typename T>
+using CLPooling3dLayerFixture = Pooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLSpecialPooling3dLayerFixture = SpecialPooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLPooling3dLayerGlobalFixture = Pooling3dLayerGlobalValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE(QUANTIZED)
+
+TEST_SUITE(QASYMM8)
+// Small Dataset Quantized Dataset
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3DLayerDatasetQuantized,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })),
+ framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+// Large Dataset Quantized Dataset
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+ combine(Pooling3DLayerDatasetQuantized,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })),
+ framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+// Large Dataset Quantized Dataset Signed
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3DLayerDatasetQuantized,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+ framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })),
+ framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+// Large Dataset Quantized pooling test
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+ combine(Pooling3DLayerDatasetQuantized,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+ framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })),
+ framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPooling3dLayerFixture<float>, framework::DatasetMode::ALL, datasets::Pooling3dLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5dShapes(), combine(Pooling3dLayerDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP,
+ framework::dataset::make("DataType", DataType::F32))))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+ TensorShape(4U, 27U, 13U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", false)),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+ TensorShape(27U, 13U, 4U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+ TensorShape(4U, 79U, 37U, 11U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", false)),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F16))))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP,
+ framework::dataset::make("DataType", DataType::F16))))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+ TensorShape(4U, 27U, 13U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", false)),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+ TensorShape(27U, 13U, 4U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+ TensorShape(4U, 79U, 37U, 11U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", false)),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // Pooling3dLayer
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 63dec3910f..9fe28c7acf 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -153,7 +153,7 @@ FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPoolingLayerFixture<float>, framewor
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall,
framework::dataset::make("DataType",
DataType::F32))),
pool_data_layout_dataset))
@@ -161,7 +161,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::Datase
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(datasets::PoolingTypes(),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })),
@@ -181,10 +181,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::Datase
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall,
- framework::dataset::make("DataType",
- DataType::F32))),
- pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerDatasetFPIndicesSmall,
+ framework::dataset::make("DataType",
+ DataType::F32))),
+ pool_data_layout_dataset),framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -232,8 +233,9 @@ TEST_SUITE_END() // GlobalPooling
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
- framework::dataset::make("DataType", DataType::F16))),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F16))),
pool_data_layout_dataset),
pool_fp_mixed_precision_dataset))
{
@@ -248,10 +250,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMixedPrecesionPoolingLayerFixture<half>, fram
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall,
- framework::dataset::make("DataType",
- DataType::F16))),
- pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerDatasetFPIndicesSmall,
+ framework::dataset::make("DataType",
+ DataType::F16))),
+ pool_data_layout_dataset), framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -307,7 +310,7 @@ template <typename T>
using CLPoolingLayerQuantizedMixedDataLayoutFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T, true>;
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetQASYMM8Small,
framework::dataset::make("DataType", DataType::QASYMM8))),
pool_data_layout_dataset),
@@ -317,7 +320,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framew
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
@@ -333,7 +336,7 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayou
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetQASYMM8Small,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
pool_data_layout_dataset),
@@ -343,7 +346,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framewo
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp
index 947f84af49..8a8fa4aef0 100644
--- a/tests/validation/CL/ReduceMean.cpp
+++ b/tests/validation/CL/ReduceMean.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,7 @@ constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value
constexpr AbsoluteTolerance<float> tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric quantized type */
-const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }),
+const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(2, 3), Coordinates(0, 1, 2, 3) }),
framework::dataset::make("KeepDims", { true }));
const auto axis_drop = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1), Coordinates(3), Coordinates(1, 2), Coordinates(2, 1) }), framework::dataset::make("KeepDims", { false }));
} // namespace
diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/Remap.cpp
deleted file mode 100644
index bbb3cecea9..0000000000
--- a/tests/validation/CL/Remap.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLRemap.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/RemapFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Remap)
-template <typename T>
-using CLRemapFixture = RemapValidationFixture<CLTensor, CLAccessor, CLRemap, T>;
-template <typename T>
-using CLRemapLayoutFixture = RemapValidationMixedLayoutFixture<CLTensor, CLAccessor, CLRemap, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapLayoutFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Reverse.cpp b/tests/validation/CL/Reverse.cpp
index 11df0e7803..82effc2136 100644
--- a/tests/validation/CL/Reverse.cpp
+++ b/tests/validation/CL/Reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,9 +41,10 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
-auto run_small_dataset = combine(datasets::SmallShapes(), datasets::Tiny1DShapes());
+auto run_small_dataset = combine(datasets::Small3DShapes(), datasets::Tiny1DShapes());
auto run_large_dataset = combine(datasets::LargeShapes(), datasets::Tiny1DShapes());
} // namespace
@@ -53,33 +54,34 @@ TEST_SUITE(Reverse)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
+ make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis shape
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis length (> 4)
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U), 1, DataType::U8),
}),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
+ make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U), 1, DataType::U8),
})),
- framework::dataset::make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8),
+ make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8),
TensorInfo(TensorShape(2U, 10U), 1, DataType::U32),
TensorInfo(TensorShape(8U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
})),
- framework::dataset::make("Expected", { false, false, false, false, true, true})),
+ make("Expected", { false, false, false, false, true, true})),
src_info, dst_info, axis_info, expected)
{
Status s = CLReverse::validate(&src_info.clone()->set_is_resizable(false),
&dst_info.clone()->set_is_resizable(false),
- &axis_info.clone()->set_is_resizable(false));
+ &axis_info.clone()->set_is_resizable(false),
+ false);
ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
}
// clang-format on
@@ -93,7 +95,11 @@ TEST_SUITE(F16)
FIXTURE_DATA_TEST_CASE(RunSmall,
CLReverseFixture<half>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::F16),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -102,7 +108,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
CLReverseFixture<half>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::F16),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -113,7 +123,11 @@ TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall,
CLReverseFixture<float>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::F32)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::F32),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -122,7 +136,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
CLReverseFixture<float>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::F32)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::F32),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -135,7 +153,11 @@ TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall,
CLReverseFixture<uint8_t>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::QASYMM8),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -144,7 +166,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
CLReverseFixture<uint8_t>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::QASYMM8),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/RsqrtLayer.cpp b/tests/validation/CL/RsqrtLayer.cpp
index 936d853d34..2353bda8d3 100644
--- a/tests/validation/CL/RsqrtLayer.cpp
+++ b/tests/validation/CL/RsqrtLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,8 +42,11 @@ namespace validation
{
namespace
{
-RelativeTolerance<float> tolerance_fp32(0.000001f);
-RelativeTolerance<float> tolerance_fp16(0.001f);
+RelativeTolerance<float> tolerance_fp32(0.000001f);
+RelativeTolerance<float> tolerance_fp16(0.001f);
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
+
} // namespace
TEST_SUITE(CL)
TEST_SUITE(RsqrtLayer)
@@ -68,6 +71,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
// *INDENT-ON*
template <typename T>
using CLRsqrtLayerFixture = RsqrtValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>;
+template <typename T>
+using CLRsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>;
TEST_SUITE(Float)
TEST_SUITE(FP16)
@@ -102,6 +107,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLRsqrtLayerFixture<float>, framework::DatasetM
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, -128) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, -128) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, 0) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, 0) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // RsqrtLayer
TEST_SUITE_END() // CL
} // namespace validation
diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp
index 2b34f1f353..10a99ae34f 100644
--- a/tests/validation/CL/Scale.cpp
+++ b/tests/validation/CL/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -82,6 +82,7 @@ constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
constexpr float tolerance_f32_absolute(0.001f);
RelativeTolerance<float> tolerance_f32(0.05);
+constexpr float abs_tolerance_f16(0.1f);
RelativeTolerance<half> tolerance_f16(half(0.1));
constexpr float tolerance_num_f32(0.01f);
@@ -186,16 +187,6 @@ TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
-TEST_CASE(WindowShrink, framework::DatasetMode::ALL)
-{
- const auto input = TensorInfo{ TensorShape(37U, 37U, 2U), 1, DataType::F32 };
- const auto output = TensorInfo{ TensorShape(39U, 55U, 2U), 1, DataType::F32 };
- Status result{};
-
- result = CLScale::validate(&input.clone()->set_is_resizable(false), &output.clone()->set_is_resizable(false), ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
- ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
-}
-
TEST_CASE(IncorrectScaleFactor, framework::DatasetMode::ALL)
{
const auto input = TensorInfo{ TensorShape(28U, 33U, 2U), 1, DataType::F32 };
@@ -272,7 +263,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLScaleFixture<half>, framework::DatasetMode::ALL, A
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet))
{
@@ -281,7 +272,7 @@ FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::Dataset
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
const auto f16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16));
FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleSamplingPolicySet))
@@ -291,7 +282,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode:
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleAlignCornersSamplingPolicySet))
{
@@ -300,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework::
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp
new file mode 100644
index 0000000000..b1531eb64a
--- /dev/null
+++ b/tests/validation/CL/ScatterLayer.cpp
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLScatter.h"
+#include "tests/validation/fixtures/ScatterLayerFixture.h"
+#include "tests/datasets/ScatterDataset.h"
+#include "tests/CL/CLAccessor.h"
+#include "arm_compute/function_info/ScatterInfo.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */
+} // namespace
+
+template <typename T>
+using CLScatterLayerFixture = ScatterValidationFixture<CLTensor, CLAccessor, CLScatter, T>;
+
+using framework::dataset::make;
+
+TEST_SUITE(CL)
+TEST_SUITE(Scatter)
+DATA_TEST_CASE(Validate, framework::DatasetMode::PRECOMMIT, zip(
+ make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32), // Mismatching data types
+ TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid
+ TensorInfo(TensorShape(15U), 1, DataType::U8), // Valid
+ TensorInfo(TensorShape(8U), 1, DataType::F32),
+ TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims.
+ TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims.
+ TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype.
+ TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices
+ TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1)
+ TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5
+ }),
+ make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16),
+ TensorInfo(TensorShape(15U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U), 1, DataType::U8),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(217U), 1, DataType::F32),
+ TensorInfo(TensorShape(217U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ }),
+ make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+ TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32),
+ TensorInfo(TensorShape(1U, 4U), 1, DataType::S32),
+ TensorInfo(TensorShape(3U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(6U, 2U), 1, DataType::S32),
+ }),
+ make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16),
+ TensorInfo(TensorShape(15U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U), 1, DataType::U8),
+ TensorInfo(TensorShape(8U), 1, DataType::F32),
+ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(271U), 1, DataType::F32),
+ TensorInfo(TensorShape(12U), 1, DataType::F32),
+ TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32),
+ }),
+ make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false),
+ ScatterInfo(ScatterFunction::Max, false),
+ ScatterInfo(ScatterFunction::Max, false),
+ ScatterInfo(ScatterFunction::Min, false),
+ ScatterInfo(ScatterFunction::Add, false),
+ ScatterInfo(ScatterFunction::Update, false),
+ ScatterInfo(ScatterFunction::Sub, false),
+ ScatterInfo(ScatterFunction::Sub, false),
+ ScatterInfo(ScatterFunction::Update, false),
+ ScatterInfo(ScatterFunction::Update, false),
+ }),
+ make("Expected", { false, true, true, true, false, false, false, false, false, false })),
+ input_info, updates_info, indices_info, output_info, scatter_info, expected)
+{
+ const Status status = CLScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
+const auto allScatterFunctions = make("ScatterFunction",
+ {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max });
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::Small1DScatterDataset(),
+ make("DataType", {DataType::F32}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {true})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// With this test, src should be passed as nullptr.
+FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::Small1DScatterDataset(),
+ make("DataType", {DataType::F32}),
+ make("ScatterFunction", {ScatterFunction::Add}),
+ make("ZeroInit", {true}),
+ make("Inplace", {false}),
+ make("Padding", {true})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// Updates/src/dst have same no. dims.
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMultiDimDataset(),
+ make("DataType", {DataType::F32}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {true})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+1-D to m+n-D cases
+FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMultiIndicesDataset(),
+ make("DataType", {DataType::F32}),
+ make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }),
+ make("ZeroInit", {false}),
+ make("Inplace", {false, true}),
+ make("Padding", {true})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterBatchedDataset(),
+ make("DataType", {DataType::F32}),
+ make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {true})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterScalarDataset(),
+ make("DataType", {DataType::F32}),
+ make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false}))) // NOTE: Padding not supported in this datset
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+
+
+// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding.
+// Padding is well tested in F32 Datatype test cases.
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::F16}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int32_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::S32}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::S16}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::S8}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S8
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint32_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::U32}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint16_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::U16}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallScatterMixedDataset(),
+ make("DataType", {DataType::U8}),
+ allScatterFunctions,
+ make("ZeroInit", {false}),
+ make("Inplace", {false}),
+ make("Padding", {false})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U8
+TEST_SUITE_END() // Integer
+
+TEST_SUITE_END() // Scatter
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp
index a06c05744f..f243780c00 100644
--- a/tests/validation/CL/Tile.cpp
+++ b/tests/validation/CL/Tile.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,6 +42,7 @@ namespace validation
namespace
{
const auto MultiplesDataset = framework::dataset::make("Multiples", { Multiples{ 3 },
+ Multiples{ 7 },
Multiples{ 2, 2 },
Multiples{ 1, 1, 3, 4 },
Multiples{ 2, 1, 2, 2 },
diff --git a/tests/validation/CL/Transpose.cpp b/tests/validation/CL/Transpose.cpp
index 943534058b..6cf5fe8537 100644
--- a/tests/validation/CL/Transpose.cpp
+++ b/tests/validation/CL/Transpose.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,12 +50,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape
TensorInfo(TensorShape(20U, 13U), 1, DataType::U8), // Wrong data type
TensorInfo(TensorShape(20U, 16U), 1, DataType::U32), // Valid
+ TensorInfo(TensorShape(20U, 16U, 3U, 3U), 1, DataType::U16), // Transpose only first two dimensions
}),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 20U), 1, DataType::U32),
TensorInfo(TensorShape(31U, 20U), 1, DataType::U16),
TensorInfo(TensorShape(16U, 20U), 1, DataType::U32),
+ TensorInfo(TensorShape(16U, 20U, 3U, 3U), 1, DataType::U16),
})),
- framework::dataset::make("Expected", { false, false, true })),
+ framework::dataset::make("Expected", { false, false, true, true })),
a_info, output_info, expected)
{
// Lock tensors
@@ -80,6 +82,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint8_t>, framework::Dataset
// Validate output
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunLargeHighDimensional,
+ CLTransposeFixture<uint8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(concat(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+ datasets::Large5dShapes()),
+ framework::dataset::make("DataType", DataType::U8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
TEST_SUITE_END() // U8
TEST_SUITE(U16)
@@ -106,6 +118,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint32_t>, framework::Datase
// Validate output
validate(CLAccessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallHighDimensional,
+ CLTransposeFixture<uint32_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+ framework::dataset::make("DataType", DataType::U32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
framework::dataset::make("DataType", DataType::U32)))
{
diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp
index ad2d4892ba..ac433721d8 100644
--- a/tests/validation/CL/UNIT/DynamicTensor.cpp
+++ b/tests/validation/CL/UNIT/DynamicTensor.cpp
@@ -29,10 +29,8 @@
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "src/core/CL/kernels/CLReductionOperationKernel.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
diff --git a/tests/validation/CL/UNIT/Multithreaded.cpp b/tests/validation/CL/UNIT/Multithreaded.cpp
new file mode 100644
index 0000000000..5c75df709d
--- /dev/null
+++ b/tests/validation/CL/UNIT/Multithreaded.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/RuntimeContext.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/ParametersLibrary.h"
+#include "tests/validation/Validation.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include <thread>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(RuntimeContext)
+// This test tries scheduling work concurrently from two independent threads
+TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL)
+{
+ constexpr auto num_threads(16u);
+ std::array<CLActivationLayer, num_threads> func{};
+ std::array<CLPixelWiseMultiplication, num_threads> pmul{};
+ std::array<CLTensor, num_threads> s0{};
+ std::array<CLTensor, num_threads> s1{};
+
+ std::array<CLTensor, num_threads> st{};
+ std::array<CLTensor, num_threads> dt{};
+
+ const TensorShape tensor_shape(128u, 4u, 5u);
+ const ActivationLayerInfo ainfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.5f, 1.f);
+ std::array<std::thread, num_threads> threads;
+ auto ctx = parameters->get_ctx<CLTensor>();
+
+ for(auto i = 0u; i < num_threads; ++i)
+ {
+ s0[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+ s1[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+ st[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+ dt[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+ func[i] = CLActivationLayer(ctx);
+ pmul[i] = CLPixelWiseMultiplication();
+ threads[i] =
+ std::thread([&,i]
+ {
+ auto &s = st[i];
+ auto &t = dt[i];
+ auto &p0 = s0[i];
+ auto &p1 = s1[i];
+ pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP);
+ func[i].configure(&s, &t, ainfo);
+ s.allocator()->allocate();
+ t.allocator()->allocate();
+ p0.allocator()->allocate();
+ p1.allocator()->allocate();
+ library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f);
+ library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f);
+ pmul[i].run();
+ func[i].run();
+ });
+ }
+
+ for(auto &t : threads)
+ {
+ t.join();
+ }
+
+ SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 };
+ SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 };
+ SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 };
+ library->fill_tensor_uniform(ra, 0, -1.f, 1.f);
+ library->fill_tensor_uniform(rb, 0, -1.f, 1.f);
+ const auto mul = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32);
+ const auto golden = reference::activation_layer<float>(mul, ainfo);
+ for(auto &d : dt)
+ {
+ validate(CLAccessor(d), golden);
+ }
+}
+
+TEST_SUITE_END() // MultipleThreadedScheduller
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp
index d04c10cee2..4345c4b08a 100644
--- a/tests/validation/CL/WeightsReshape.cpp
+++ b/tests/validation/CL/WeightsReshape.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/gpu/cl/kernels/ClWeightsReshapeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -41,7 +41,7 @@ namespace validation
TEST_SUITE(CL)
TEST_SUITE(WeightsReshape)
-using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>;
+using ClWeightsReshape = ClSynthetizeOperatorWithBorder<opencl::kernels::ClWeightsReshapeKernel>;
/** Validate tests
*
@@ -87,15 +87,15 @@ framework::dataset::make("NumGroups", { 1, 1, 1, 2, 1, 2 })),
framework::dataset::make("Expected", { false, false, false, false, false, false })),
input_info, biases_info, output_info, num_groups, expected)
{
- bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info, num_groups));
+ bool status = bool(opencl::kernels::ClWeightsReshapeKernel::validate(&input_info, &biases_info, &output_info, num_groups));
ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
}
template <typename T>
-using CLWeightsReshapeFixture = WeightsReshapeValidationFixture<CLTensor, CLAccessor, CLWeightsReshape, T>;
+using ClWeightsReshapeFixture = WeightsReshapeOpValidationFixture<CLTensor, CLAccessor, ClWeightsReshape, T>;
TEST_SUITE(Float)
-FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(FP32, ClWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }),
framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("HasBias", { true, false })),
framework::dataset::make("NumGroups", { 1, 2 })))
@@ -104,7 +104,7 @@ FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetM
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }),
+FIXTURE_DATA_TEST_CASE(FP16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }),
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("HasBias", { true, false })),
framework::dataset::make("NumGroups", { 3, 4 })))
@@ -113,7 +113,7 @@ FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMo
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }),
+FIXTURE_DATA_TEST_CASE(BFloat16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }),
framework::dataset::make("DataType", DataType::BFLOAT16)),
framework::dataset::make("HasBias", { false })),
framework::dataset::make("NumGroups", { 3, 4 })))
@@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::Datas
TEST_SUITE_END()
TEST_SUITE(Quantized)
-FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(QASYMM8, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("HasBias", { false })),
framework::dataset::make("NumGroups", { 1, 2 })))
@@ -134,7 +134,7 @@ FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::Dat
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("HasBias", { false })),
framework::dataset::make("NumGroups", { 1, 2 })))
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index 6ac37d1475..196e7edb8c 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
#include "tests/datasets/LargeConvolutionLayerDataset.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/datasets/SmallConvolutionLayerDataset.h"
@@ -47,6 +48,7 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
// *INDENT-OFF*
@@ -57,108 +59,232 @@ const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */
constexpr float tolerance_num = 0.05f; /**< Tolerance number */
constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */
-constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */
+constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
- ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.8f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
});
-const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU),
- ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU)
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f)
});
} // namespace
using namespace arm_compute::misc::shape_calculator;
+/*
+ Testing Strategy of CL Winograd:
+ - For nchw and nhwc and for each kernel size, we have a dedicated OpenCL kernel.
+ (except 1xN and Nx1 uses NxN under the hood). Therefore, test cases should be
+ stressed for each of these configurations.
+ - Fp32 and Fp16 kernels are the same. Only the DATA_TYPE build option changes
+ between these two. Because the same kernel is stressed thoroughly for both
+ small and large shapes for Fp32 data type, Fp16 kernels are run on a subset
+ of the shapes, because we get diminishing returns by exhaustively testing the
+ same kernel.
+ - Activations only affect the output stage and it's calculated on the output tile.
+ Exhaustively testing all activations with all the shapes does not provide much
+ value but increases the testing time quite significantly. Therefore, all activations
+ are tested in a subset of the shapes, and for all MxM kernels and data layouts as
+ they represent different OpenCL kernels. (1xM and Mx1 kernels use MxM under the hood).
+*/
TEST_SUITE(CL)
TEST_SUITE(Winograd)
TEST_SUITE(ConvolutionLayer)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", {
- TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding
- TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch
- TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
- TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed
- TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported
- }),
- framework::dataset::make("WeightsInfo", {
- TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
- TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
- TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
- TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
- })),
- framework::dataset::make("BiasesInfo", {
- TensorInfo(TensorShape(19U), 1, DataType::F16),
- TensorInfo(TensorShape(19U), 1, DataType::F32),
- TensorInfo(TensorShape(21U), 1, DataType::F32),
- TensorInfo(TensorShape(16U), 1, DataType::F32),
- TensorInfo(TensorShape(16U), 1, DataType::F32)
- })),
- framework::dataset::make("OutputInfo", {
- TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
- TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
- TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
- TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
- })),
- framework::dataset::make("ConvInfo", {
- PadStrideInfo(1, 1, 1, 1),
- PadStrideInfo(1, 1, 1, 1),
- PadStrideInfo(1, 2, 0, 0),
- PadStrideInfo(1, 1, 1, 1),
- PadStrideInfo(1, 1, 1, 0)
- })),
- framework::dataset::make("Expected", { false, false, false, false, false })),
- input_info, weights_info, bias_info, output_info, conv_info, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ make("InputInfo", {
+ TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding
+ TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch
+ TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
+ TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed
+ TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported
+ }),
+ make("WeightsInfo", {
+ TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
+ }),
+ make("BiasesInfo", {
+ TensorInfo(TensorShape(19U), 1, DataType::F16),
+ TensorInfo(TensorShape(19U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32)
+ }),
+ make("OutputInfo", {
+ TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
+ TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+ TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
+ }),
+ make("ConvInfo", {
+ PadStrideInfo(1, 1, 1, 1),
+ PadStrideInfo(1, 1, 1, 1),
+ PadStrideInfo(1, 2, 0, 0),
+ PadStrideInfo(1, 1, 1, 1),
+ PadStrideInfo(1, 1, 1, 0)
+ }),
+ make("Expected", { false, false, false, false, false })),
+ input_info, weights_info, bias_info, output_info, conv_info, expected)
{
ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS);
}
+DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip(
+ make("WeightsInfo", {
+ // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted
+
+ // Fp32/16, NCHW
+ // 3x1, 1x3, 3x3 --> all TRUE
+ TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+ // 5x1, 1x5, 5x5 --> all TRUE
+ TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+ TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // 7x1, 1x7, 7x7
+ // nchw does not support kernels with size 7 --> all FALSE
+ TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // unsupported kernel sizes
+ TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // Fp32/16, NHWC
+ // 7x1, 1x7, 7x7 --> all TRUE
+ TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+ // 3x1, 1x3, 3x3 --> all TRUE
+ TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+ // 5x1, 1x5, 5x5 --> all TRUE
+ TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+
+ // unsupported kernel sizes
+ TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+ }),
+ make("Expected", {
+ true, true, true, // nchw, 3x3, 1x3, 3x1
+ true, true, true, // nchw, 5x5, 1x5, 5x1
+ false, false, false, // nchw, 7x7, 1x7, 7x1
+ false, false, false, // nchw, random unsupported kernels
+ true, true, true, // nhwc, 7x7, 1x7, 7x1
+ true, true, true, // nhwc, 3x3, 1x3, 3x1
+ true, true, true, // nhwc, 5x5, 1x5, 5x1
+ false, false, false, // nchw, random unsupported kernels
+ })),
+ weights_info_const, expected)
+{
+ DataType data_type = weights_info_const.data_type();
+ DataLayout data_layout = weights_info_const.data_layout();
+
+ TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type);
+ TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type);
+ TensorInfo weights_info = weights_info_const;
+
+ if(data_layout == DataLayout::NHWC)
+ {
+ // Convert to NHWC
+ PermutationVector perm = PermutationVector(2U, 0U, 1U);
+
+ TensorShape input_shape = input_info.tensor_shape();
+ TensorShape weights_shape = weights_info.tensor_shape();
+ permute(input_shape, perm);
+ permute(weights_shape, perm);
+
+ input_info.set_tensor_shape(input_shape);
+ weights_info.set_tensor_shape(weights_shape);
+
+ input_info.set_data_layout(data_layout);
+ weights_info.set_data_layout(data_layout);
+ bias_info.set_data_layout(data_layout);
+ }
+
+ PadStrideInfo conv_info(1, 1, 0, 0);
+
+ TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info);
+ TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout);
+
+ Status status = CLWinogradConvolutionLayer::validate(
+ &input_info,
+ &weights_info,
+ &bias_info,
+ &output_info,
+ conv_info,
+ ActivationLayerInfo(),
+ true /* fast math */);
+
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
TEST_SUITE(FP32)
using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>;
using CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float, float, true, true>;
TEST_SUITE(Conv3x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
- framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
- framework::dataset::make("Bias", TensorShape(1U))),
- framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1U, 1U))),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(8U, 8U, 32U)),
+ make("Weight", TensorShape(3U, 3U, 32U, 4U)),
+ make("Bias", TensorShape(4U)),
+ make("Output", TensorShape(6U, 6U, 4U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -167,20 +293,20 @@ TEST_SUITE_END() // Conv3x3
TEST_SUITE(Conv3x1)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -189,20 +315,36 @@ TEST_SUITE_END() // Conv3x1
TEST_SUITE(Conv1x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(8U, 8U, 32U)),
+ make("Weight", TensorShape(1U, 3U, 32U, 1U)),
+ make("Bias", TensorShape(1U)),
+ make("Output", TensorShape(8U, 6U, 1U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -211,10 +353,10 @@ TEST_SUITE_END() // Conv1x3
TEST_SUITE(Conv5x5)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset ),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -222,11 +364,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset ),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(13U, 13U, 32U)),
+ make("Weight", TensorShape(5U, 5U, 32U, 4U)),
+ make("Bias", TensorShape(4U)),
+ make("Output", TensorShape(9U, 9U, 4U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -235,10 +393,10 @@ TEST_SUITE_END() // Conv5x5
TEST_SUITE(Conv5x1)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -246,10 +404,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -259,10 +417,10 @@ TEST_SUITE_END() // Conv5x1
TEST_SUITE(Conv1x5)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -270,16 +428,63 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
}
TEST_SUITE_END() // Conv1x5
+
+TEST_SUITE(Conv1x7)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(13U, 13U, 32U)),
+ make("Weight", TensorShape(1U, 7U, 32U, 4U)),
+ make("Bias", TensorShape(4U)),
+ make("Output", TensorShape(13U, 11U, 4U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 2)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv1x7
+
+TEST_SUITE(Conv7x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv7x1
+
+/** @note: Although 7x7 is in the kernels, reference implementation
+ * does not support it. So, it remains as a "test gap".
+ */
+
TEST_SUITE_END() // FP32
@@ -288,20 +493,36 @@ TEST_SUITE(FP16)
using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, half, float>;
TEST_SUITE(Conv3x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x3DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(8U, 8U, 32U)),
+ make("Weight", TensorShape(3U, 3U, 32U, 6U)),
+ make("Bias", TensorShape(6U)),
+ make("Output", TensorShape(6U, 6U, 6U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -310,20 +531,20 @@ TEST_SUITE_END() // Conv3x3
TEST_SUITE(Conv3x1)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x1DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -332,20 +553,20 @@ TEST_SUITE_END() // Conv3x1
TEST_SUITE(Conv1x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x3DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -354,10 +575,10 @@ TEST_SUITE_END() // Conv1x3
TEST_SUITE(Conv5x5)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -365,23 +586,39 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x5DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(13U, 13U, 32U)),
+ make("Weight", TensorShape(5U, 5U, 32U, 6U)),
+ make("Bias", TensorShape(6U)),
+ make("Output", TensorShape(9U, 9U, 6U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
TEST_SUITE_END() // Conv5x5
TEST_SUITE(Conv5x1)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -389,10 +626,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x1DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -402,10 +639,10 @@ TEST_SUITE_END() // Conv5x1
TEST_SUITE(Conv1x5)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -413,10 +650,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x5DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -426,10 +663,10 @@ TEST_SUITE_END() // Conv1x5
TEST_SUITE(Conv1x7)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsSmallDataset),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
@@ -437,16 +674,46 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x7DatasetFp16Subset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("Input", TensorShape(13U, 13U, 32U)),
+ make("Weight", TensorShape(1U, 7U, 32U, 6U)),
+ make("Bias", TensorShape(6U)),
+ make("Output", TensorShape(13U, 7U, 6U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
}
TEST_SUITE_END() // Conv1x7
+
+TEST_SUITE(Conv7x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsSmallDataset,
+ make("DataLayout", { DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // Conv7x1
+
TEST_SUITE_END() // FP16
TEST_SUITE_END() // ConvolutionLayer
TEST_SUITE_END() // Winograd
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
new file mode 100644
index 0000000000..448e96c4f9
--- /dev/null
+++ b/tests/validation/CMakeLists.txt
@@ -0,0 +1,146 @@
+# Copyright (c) 2023 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+target_sources(
+ arm_compute_validation
+ PRIVATE UNIT/SafeIntegerOps.cpp
+ UNIT/Version.cpp
+ UNIT/TensorInfo.cpp
+ UNIT/TensorShape.cpp
+ UNIT/Utils.cpp
+ UNIT/SubTensorInfo.cpp
+ UNIT/WindowIterator.cpp
+ UNIT/LifetimeManager.cpp
+ UNIT/GPUTarget.cpp
+ CPP/DetectionPostProcessLayer.cpp
+ CPP/TopKV.cpp
+ CPP/DFT.cpp
+ CPP/Permute.cpp
+ CPP/NonMaximumSuppression.cpp)
+
+if(ENABLE_NEON)
+ target_sources(
+ arm_compute_validation
+ PRIVATE NEON/ElementwiseNegation.cpp
+ NEON/BoundingBoxTransform.cpp
+ NEON/ChannelShuffle.cpp
+ NEON/Logical.cpp
+ NEON/DilatedConvolutionLayer.cpp
+ NEON/PoolingLayer.cpp
+ NEON/BitwiseNot.cpp
+ NEON/FillBorder.cpp
+ NEON/ElementwiseRsqrtLayer.cpp
+ NEON/DepthConcatenateLayer.cpp
+ NEON/ElementwisePower.cpp
+ NEON/Fill.cpp
+ NEON/ROIPoolingLayer.cpp
+ NEON/LSTMLayer.cpp
+ NEON/ArithmeticSubtraction.cpp
+ NEON/GEMMLowp.cpp
+ NEON/Unstack.cpp
+ NEON/Slice.cpp
+ NEON/Pooling3dLayer.cpp
+ NEON/BitwiseOr.cpp
+ NEON/HeightConcatenateLayer.cpp
+ NEON/ReshapeLayer.cpp
+ NEON/SoftmaxLayer.cpp
+ NEON/Gather.cpp
+ NEON/CropResize.cpp
+ NEON/ReductionOperation.cpp
+ NEON/PixelWiseMultiplication.cpp
+ NEON/LogSoftmaxLayer.cpp
+ NEON/DepthConvertLayer.cpp
+ NEON/Flatten.cpp
+ NEON/ElementwiseKernelSelection.cpp
+ NEON/DepthToSpaceLayer.cpp
+ NEON/ElementwiseAbsoluteValue.cpp
+ NEON/PadLayer.cpp
+ NEON/MeanStdDevNormalizationLayer.cpp
+ NEON/GlobalPoolingLayer.cpp
+ NEON/RNNLayer.cpp
+ NEON/DetectionPostProcessLayer.cpp
+ NEON/ElementwiseRound.cpp
+ NEON/BitwiseXor.cpp
+ NEON/GEMM.cpp
+ NEON/FuseBatchNormalization.cpp
+ NEON/BitwiseAnd.cpp
+ NEON/ElementwiseMax.cpp
+ NEON/ReduceMean.cpp
+ NEON/Reverse.cpp
+ NEON/L2NormalizeLayer.cpp
+ NEON/Convolution3D.cpp
+ NEON/ArithmeticAddition.cpp
+ NEON/ActivationLayer.cpp
+ NEON/SpaceToBatchLayer.cpp
+ NEON/ElementwiseLog.cpp
+ NEON/LSTMLayerQuantized.cpp
+ NEON/Im2Col.cpp
+ NEON/DequantizationLayer.cpp
+ NEON/DeconvolutionLayer.cpp
+ NEON/Select.cpp
+ NEON/ElementwiseSin.cpp
+ NEON/PReluLayer.cpp
+ NEON/BatchNormalizationLayer.cpp
+ NEON/ElementwiseMin.cpp
+ NEON/InstanceNormalizationLayer.cpp
+ NEON/ROIAlignLayer.cpp
+ NEON/ElementwiseDivision.cpp
+ NEON/ElementwiseExpLayer.cpp
+ NEON/ArgMinMax.cpp
+ NEON/QLSTMLayerNormalization.cpp
+ NEON/Col2Im.cpp
+ NEON/Split.cpp
+ NEON/Transpose.cpp
+ NEON/GenerateProposalsLayer.cpp
+ NEON/StackLayer.cpp
+ NEON/WidthConcatenateLayer.cpp
+ NEON/NormalizationLayer.cpp
+ NEON/Copy.cpp
+ NEON/ElementwiseSquareDiff.cpp
+ NEON/MaxUnpoolingLayer.cpp
+ NEON/Permute.cpp
+ NEON/Comparisons.cpp
+ NEON/BatchConcatenateLayer.cpp
+ NEON/Tile.cpp
+ NEON/BatchToSpaceLayer.cpp
+ NEON/SpaceToDepthLayer.cpp
+ NEON/DepthwiseConvolutionLayerNative.cpp
+ NEON/QuantizationLayer.cpp
+ NEON/ConvertFullyConnectedWeights.cpp
+ NEON/Floor.cpp
+ NEON/FFT.cpp
+ NEON/Cast.cpp
+ NEON/PriorBoxLayer.cpp
+ NEON/Scale.cpp
+ NEON/ReorgLayer.cpp
+ NEON/Range.cpp
+ NEON/DirectConvolutionLayer.cpp
+ NEON/DepthwiseConvolutionLayer.cpp
+ NEON/FullyConnectedLayer.cpp
+ NEON/ConvolutionLayer.cpp
+ NEON/StridedSlice.cpp
+ NEON/ReorderLayer.cpp
+ NEON/UNIT/DynamicTensor.cpp
+ NEON/UNIT/TensorAllocator.cpp
+ NEON/UNIT/MemoryManager.cpp
+ NEON/UNIT/RuntimeContext.cpp)
+endif()
diff --git a/tests/validation/CPP/DFT.cpp b/tests/validation/CPP/DFT.cpp
index e19e850589..84431399be 100644
--- a/tests/validation/CPP/DFT.cpp
+++ b/tests/validation/CPP/DFT.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -125,7 +125,7 @@ DATA_TEST_CASE(Real, framework::DatasetMode::ALL, shapes_2d_dft,
auto backward = reference::ridft_2d(forward, is_odd);
// Validate with input
- validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f));
+ validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f), 0.f, AbsoluteTolerance<float>(0.001f));
}
DATA_TEST_CASE(Complex, framework::DatasetMode::ALL, shapes_2d_dft,
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 0f5d5c5101..560460fd33 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,9 +22,12 @@
* SOFTWARE.
*/
#include "tests/validation/Helpers.h"
+#include "tests/framework/Asserts.h"
#include <algorithm>
#include <cmath>
+#include <cstdint>
+#include <tuple>
namespace arm_compute
{
@@ -349,6 +352,225 @@ void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &d
}
}
+QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info,
+ const QuantizationInfo &weight_q_info,
+ int32_t height,
+ int32_t width,
+ int32_t channels,
+ DataType data_type,
+ float bias_fraction)
+{
+ /** Quantization Setup of convolution
+ *
+ * Just like any other multiply-accummulate, convolution (2D) operation
+ * multiplies and accumulates the input and weight tensors. This operation
+ * takes place in three dimensions: height, width and channels. All of them
+ * belong to the weight tensor.
+ *
+ * The formula for simple convolution can be written as:
+ * C = sum_h sum_w sum_c(I[h_offset + h, w_offset + w, c] * W[h, w, c])
+ *
+ * Here, h_offset and w_offset are the starting positions in the image. Effects
+ * of paddings are ignored. This accumulation reduces to something like
+ *
+ * C = sum_m(I_index * W_hwc)
+ * where m is height x width x channels.
+ *
+ * Non-unit strides and/or dilations do not change the probabilistic nature of
+ * this sum because we always iterate as the size of the weight tensor.
+ *
+ * Paddings may affect this summation, but it's a boundary condition and so is
+ * neglected for brevity.
+ */
+
+ return suggest_mac_dst_q_info_and_bias(in_q_info, weight_q_info, height * width * channels, data_type, bias_fraction);
+}
+
+QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
+ const QuantizationInfo &rhs_q_info,
+ int32_t m, int32_t n, int32_t k, DataType data_type,
+ float bias_fraction)
+{
+ ARM_COMPUTE_UNUSED(m, n);
+
+ /** Quantization Setup of matrix multiplication
+ *
+ * We have a matrix multiplication of the form C = A * B + D
+ * where A is (m X k), B is (k x n) and C is therefore (m x n).
+ * The bias, D is (1 x n).
+ *
+ * If we have some distributional statistics of A, B and D, i.e. mean and variance,
+ * we can estimate the mean and variance of a single value in C matrix and pick
+ * good scale and offset values for the output and have non-saturated tests.
+ *
+ * Each element in the output matrix can be calculated as follows:
+ * C_ij = sum_k(A_ik * B_kj) + D_j
+ *
+ * Note: All possible A_ik, B_kj, D_j random variables are assumed mutually independent.
+ * Note: In quantized operators, bias is an integer. But, its quantization scale is
+ * assumed to be equal to lhs_scale * rhs_scale, and offset equal to 0.
+ * Note: Since, bias is an integer that should be given as input, we need to pick responsible
+ * values when adding it on top of the summation. This is where "bias_fraction" comes
+ * into play. Based on the fraction given, we also return suggested bias range (min/max)
+ * for not saturating the output.
+ *
+ * Because all random variables are mutually independent, any C_ij has the same statistics,
+ * which is why we return a single destination quantization info object; which is why we can
+ * resort to a more general calculation explained in suggest_mac_dst_q_info_and_bias().
+ *
+ * From a probabilistic perspective, the above calculation reduces to
+ * c = sum_k (a_k * b_k) + d
+ */
+
+ return suggest_mac_dst_q_info_and_bias(lhs_q_info, rhs_q_info, k, data_type, bias_fraction);
+}
+
+QuantizationHint suggest_mac_dst_q_info_and_bias(
+ const QuantizationInfo &a_q_info, const QuantizationInfo &b_q_info, int32_t K, DataType data_type, float bias_fraction, int num_sd)
+{
+ QuantizationInfo c_q_info;
+
+ ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED);
+
+ const int32_t t_max = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::max() : std::numeric_limits<int8_t>::max());
+ const int32_t t_min = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::min() : std::numeric_limits<int8_t>::min());
+
+ /** Quantization Setup of multiply-accummulate
+ *
+ * Expression (in float):
+ * C = sum_k ( A_k * B_k ) + D
+ *
+ * Lemma: An affine transformation (i.e. aX + b) to a discrete uniform random variable
+ * creates another discrete uniform random variable.
+ *
+ * Terminology:
+ * E[X]: Mean of the random variable X (sometimes referred as mu_x)
+ * var(X): Variance of the random variable X (someimes referred as sigma^2_x)
+ * std(X): sqrt(var(X)), standard deviation of X
+ *
+ * 1) Calculate the mean:
+ * E[C] = sum_k( E[A_k] * E[B_k] ) + D = K * mean_a * mean_b + mean_d
+ *
+ * Since elements of A and B are uniformly distributed random variables, we have
+ * mean_a = (max_a + min_a) / 2, mean_b = (max_b + min_b ) / 2
+ * max_a and min_a can be calculated with the scale_a/b and offset_a/b
+ * by replacing data type minimum and maximums in the equations
+ *
+ * We don't know mean_d because we have to choose it based on bias_fraction. If we call
+ * the summation as M_int, similar to above, we have:
+ *
+ * E[C_int] = sum_k( E[A_k_int] * E[B_k_int] ) + E[D_int] = K * mean_a_int * mean_b_int + mean_d_int
+ * \___________________________/
+ * E[M_int]
+ *
+ * We choose a bias mean proportional to the integer summation. This proportion is "bias_fraction".
+ * So, we have D_int = f * M_int (f: fraction), and
+ * E[D_int] = mean_d_int = f * E[M_int]
+ *
+ * This also means, for floating point value of D, the following:
+ * E[D] = mean_d = E[D_int] * a_scale * b_scale
+ *
+ * 2) Calculate the variance:
+ * var(C) = sum_k( var(A_k * B_k) ) + var(D)
+ * = sum_k ( E[A_k^2 * B_k^2] - E[A_k]^2E[B_k^2] )
+ * = ...
+ * = K * (var_a * var_b + var_a * mean^2_b + var_b * mean^2_a) + var_d
+ *
+ * Similarly, due to uniform random variable properties, we have
+ * var_a = (max_a - min_a)^2 / 12
+ * var_b = (max_b - min_b)^2 / 12
+ *
+ * Again, we don't know var_d as we don't know the bias. As set out in the previous section, we have
+ * var(D_int) = var(f * M_int) = f^2 * var(M_int)
+ *
+ * Using the same expression, we can find var(M_int):
+ * var(C_int) = sum_k( var(A_k_int * B_k_int) ) + var(D_int)
+ * = sum_k ( E[A_k_int^2 * B_k_int^2] - E[A_k_int]^2E[B_k_int^2] )
+ * = ...
+ * = K * (var_a_int * var_b_int + var_a_int * mean^2_b_int + var_b_int * mean^2_a_int) + var_d_int
+ * \_______________________________________________________________________________/
+ * var(M_int)
+ *
+ * Now, we know mean and variance of D_int, we can return a suitable bias range with
+ * [mean_d_int +/- 2 * std_d_int]
+ *
+ * This also means, for floating point value of D, the following:
+ * var(D) = var_d = var(D_int) * a_scale^2 * b_scale^2
+ *
+ * E[D] and var(D) calculated in steps (1) and (2) can be substituted into E[C] and var(C) calculatons.
+ *
+ * 3) Now, we have an idea of what would an average C will look like and how much deviation
+ * is present around it. The exact distribution of C is difficult to come up with dependent on K.
+ * But, as K increases, due to Central Limit Theorem, it'll look more like a bell shaped figure,
+ * approaching normal distribution.
+ *
+ * This is useful because, in normal distribution, we know that values +- 2 std_deviation around
+ * the mean constitute 95% of the values. Therefore, setting a plausible range for us:
+ * C_range = [C_min, C_max] = [mean_c - 2 * std_c, mean_c + 2 * std_c]
+ *
+ * 4)
+ * If we map this [C_min, C_max] to [0, 255] or [-128, 127] depending on the signedness of the
+ * data type, we can find a suitable scale and offset for the output. On average, it's expected
+ * that 5% of the output values will saturate and 95% will remain in the range.
+ *
+ * The equations to be solved for offset_c and scale_c are:
+ * C_min = scale_c * (type_min - offset_c)
+ * C_max = scale_c * (type_max - offset_c)
+ */
+
+ const int32_t a_offset = a_q_info.uniform().offset;
+ const float a_scale = a_q_info.uniform().scale;
+ const int32_t b_offset = b_q_info.uniform().offset;
+ const float b_scale = b_q_info.uniform().scale;
+
+ // Integer value statistics. Valid for both Lhs/A and Rhs/B
+ const float mean_a_int = (t_max + t_min) / 2.f;
+ constexpr float var_a_int = (256 * 256 - 1) / 12.f; // Discrete uniform RV variance
+ const float mean_b_int = mean_a_int; // A_int and B_int has the same stats
+ constexpr float var_b_int = var_a_int;
+
+ // Lhs/A stats
+ const float max_a = (t_max - a_offset) * a_scale;
+ const float min_a = (t_min - a_offset) * a_scale;
+ const float mean_a = (max_a + min_a) / 2;
+ const float var_a = (max_a - min_a) * (max_a - min_a) / 12;
+
+ // Rhs/B stats
+ const float max_b = (t_max - b_offset) * b_scale;
+ const float min_b = (t_min - b_offset) * b_scale;
+ const float mean_b = (max_b + min_b) / 2;
+ const float var_b = (max_b - min_b) * (max_b - min_b) / 12;
+
+ // Integer multiplication output/M stats
+ const float mean_m_int = K * mean_a_int * mean_b_int;
+ const float var_m_int = K * (var_a_int * var_b_int + mean_a_int * var_b_int + mean_b_int + var_a_int);
+ const float std_m_int = sqrt(var_m_int);
+
+ // Bias/D both Int and Float statistics
+ const float mean_d_int = bias_fraction * mean_m_int;
+ const float std_d_int = bias_fraction * std_m_int;
+ const float mean_d = a_scale * b_scale * mean_d_int;
+ const float std_d = a_scale * b_scale * std_d_int;
+ const float var_d = std_d * std_d;
+
+ // Also calculate the suggested bias range
+ const int32_t min_bias = mean_d_int - (num_sd * std_d_int);
+ const int32_t max_bias = mean_d_int + (num_sd * std_d_int);
+
+ // Output/C stats
+ const float mean_out = K * mean_a * mean_b + mean_d;
+ const float var_out = K * (var_a * var_b + var_a * mean_b * mean_b + var_b * mean_a * mean_a) + var_d;
+ const float std_out = sqrt(var_out);
+
+ // Output quantization setup
+ const float scale_out = (2 * num_sd) * std_out / 255;
+ const int32_t offset_out = static_cast<int32_t>(t_min - (mean_out - (num_sd * std_out)) / scale_out);
+
+ c_q_info = QuantizationInfo(scale_out, offset_out);
+
+ return { c_q_info, min_bias, max_bias };
+}
+
template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord);
template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord);
template void get_tile(const SimpleTensor<int> &in, SimpleTensor<int> &roi, const Coordinates &coord);
@@ -361,6 +583,8 @@ template void transpose_matrix(const SimpleTensor<half> &in, SimpleTensor<half>
template void transpose_matrix(const SimpleTensor<int> &in, SimpleTensor<int> &out);
template void transpose_matrix(const SimpleTensor<short> &in, SimpleTensor<short> &out);
template void transpose_matrix(const SimpleTensor<char> &in, SimpleTensor<char> &out);
+template void transpose_matrix(const SimpleTensor<int8_t> &in, SimpleTensor<int8_t> &out);
+template void transpose_matrix(const SimpleTensor<uint8_t> &in, SimpleTensor<uint8_t> &out);
template void matrix_multiply(const SimpleTensor<float> &a, const SimpleTensor<float> &b, SimpleTensor<float> &out);
template void matrix_multiply(const SimpleTensor<half> &a, const SimpleTensor<half> &b, SimpleTensor<half> &out);
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 00e588e7b7..e044620556 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,16 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_VALIDATION_HELPERS_H
-#define ARM_COMPUTE_TEST_VALIDATION_HELPERS_H
+#ifndef ACL_TESTS_VALIDATION_HELPERS_H
+#define ACL_TESTS_VALIDATION_HELPERS_H
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "support/Half.h"
#include "tests/Globals.h"
#include "tests/SimpleTensor.h"
-#include <math.h>
+#include <cmath>
+#include <cstdint>
#include <random>
#include <type_traits>
#include <utility>
@@ -50,6 +53,23 @@ template <>
struct is_floating_point<half> : public std::true_type
{
};
+template <>
+struct is_floating_point<bfloat16> : public std::true_type
+{
+};
+
+/** Helper struct to store the hints for
+ * - destination quantization info
+ * - minimum bias value
+ * - maximum bias value
+ * in quantized test construction.
+ */
+struct QuantizationHint
+{
+ QuantizationInfo q_info;
+ int32_t bias_min;
+ int32_t bias_max;
+};
/** Helper function to get the testing range for each activation layer.
*
@@ -63,13 +83,13 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation
{
std::pair<T, T> bounds;
- switch(data_type)
+ switch (data_type)
{
case DataType::F16:
{
using namespace half_float::literal;
- switch(activation)
+ switch (activation)
{
case ActivationLayerInfo::ActivationFunction::TANH:
case ActivationLayerInfo::ActivationFunction::SQUARE:
@@ -89,7 +109,7 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation
break;
}
case DataType::F32:
- switch(activation)
+ switch (activation)
{
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
// Reduce range as exponent overflows
@@ -111,23 +131,6 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation
return bounds;
}
-/** Calculate output tensor shape give a vector of input tensor to concatenate
- *
- * @param[in] input_shapes Shapes of the tensors to concatenate across depth.
- *
- * @return The shape of output concatenated tensor.
- */
-TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &input_shapes);
-
-/** Calculate output tensor shape for the concatenate operation along a given axis
- *
- * @param[in] input_shapes Shapes of the tensors to concatenate across width.
- * @param[in] axis Axis to use for the concatenate operation
- *
- * @return The shape of output concatenated tensor.
- */
-TensorShape calculate_concatenate_shape(const std::vector<TensorShape> &input_shapes, size_t axis);
-
/** Convert an asymmetric quantized simple tensor into float using tensor quantization information.
*
* @param[in] src Quantized tensor.
@@ -142,6 +145,7 @@ SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<T> &src);
* @param[in] src Float tensor.
* @param[in] quantization_info Quantification information.
*
+ * \relates arm_compute::test::SimpleTensor
* @return Quantized tensor.
*/
template <typename T>
@@ -160,7 +164,7 @@ SimpleTensor<float> convert_from_symmetric(const SimpleTensor<T> &src);
*
* @param[in] src Float tensor.
* @param[in] quantization_info Quantification information.
- *
+ * \relates arm_compute::test::SimpleTensor
* @return Quantized tensor.
*/
template <typename T>
@@ -228,7 +232,8 @@ std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo &
* @param[in] max Floating point maximum value to be quantized
* @param[in] channel_id Channel id for per channel quantization info.
*/
-std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0);
+std::pair<int, int>
+get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0);
/** Add random padding along the X axis (between 1 and 16 columns per side) to all the input tensors.
* This is used in our validation suite in order to simulate implicit padding addition after configuring, but before allocating.
@@ -239,8 +244,71 @@ std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo
*
* @note This function adds padding to the input tensors only if data_layout == DataLayout::NHWC
*/
-void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout = DataLayout::NHWC, bool only_right_pad = false);
+void add_padding_x(std::initializer_list<ITensor *> tensors,
+ const DataLayout &data_layout = DataLayout::NHWC,
+ bool only_right_pad = false);
+
+/** For 2d convolution, given the Lhs/Rhs matrix quantization informations and the convolution dimension,
+ * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability.
+ *
+ * @param[in] in_q_info Input matrix quantization info
+ * @param[in] weight_q_info Weights matrix quantization info
+ * @param[in] height Height of the weights tensor
+ * @param[in] width Width of the weights tensors
+ * @param[in] channels Number of input channels
+ * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported
+ * @param[in] bias_fraction see @ref suggest_mac_dst_q_info_and_bias() for explanation
+ *
+ * @return QuantizationHint object containing the suggested output quantization info and min/max bias range
+ */
+QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info,
+ const QuantizationInfo &weight_q_info,
+ int32_t height,
+ int32_t width,
+ int32_t channels,
+ DataType data_type,
+ float bias_fraction);
+
+/** For a matrix multiplication, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions,
+ * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability.
+ *
+ * @param[in] lhs_q_info Lhs matrix quantization info
+ * @param[in] rhs_q_info Rhs matrix quantization info
+ * @param[in] m Number of rows of Lhs matrix
+ * @param[in] n Number of columns of Rhs Matrix
+ * @param[in] k Number of rows/columns of Rhs/Lhs Matrix
+ * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported
+ * @param[in] bias_fraction see @ref suggest_mac_dst_q_info_and_bias() for explanation
+ *
+ * @return QuantizationHint object containing the suggested output quantization info and min/max bias range
+ */
+QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
+ const QuantizationInfo &rhs_q_info,
+ int32_t m,
+ int32_t n,
+ int32_t k,
+ DataType data_type,
+ float bias_fraction);
+
+/** For a multiply-accumulate (mac), given the Lhs/Rhs vector quantization informations and the dot product dimensions,
+ * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability.
+ *
+ * @param[in] lhs_q_info Lhs matrix quantization info
+ * @param[in] rhs_q_info Rhs matrix quantization info
+ * @param[in] k number of accumulations taking place in the sum, i.e. c_k = sum_k(a_k * b_k)
+ * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported
+ * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation.
+ * @param[in] num_sd (Optional) number of standard deviations we allow from the mean. Default value is 2.
+ *
+ * @return QuantizationHint object containing the suggested output quantization info and min/max bias range
+ */
+QuantizationHint suggest_mac_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
+ const QuantizationInfo &rhs_q_info,
+ int32_t k,
+ DataType data_type,
+ float bias_fraction,
+ int num_sd = 2);
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_VALIDATION_HELPERS_H */
+#endif // ACL_TESTS_VALIDATION_HELPERS_H
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 111e969bae..73f5de68ac 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,13 @@
*/
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/RuntimeContext.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuActivationKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ActivationFunctionsDataset.h"
@@ -37,7 +40,8 @@
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/ActivationLayerFixture.h"
-#include "support/Requires.h"
+#include "arm_compute/Acl.hpp"
+#include "support/AclRequires.h"
namespace arm_compute
{
@@ -65,6 +69,8 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI
case ActivationLayerInfo::ActivationFunction::SQRT:
case ActivationLayerInfo::ActivationFunction::TANH:
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ case ActivationLayerInfo::ActivationFunction::GELU:
switch(data_type)
{
case DataType::F16:
@@ -107,6 +113,7 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
case ActivationLayerInfo::ActivationFunction::SQRT:
case ActivationLayerInfo::ActivationFunction::TANH:
+ case ActivationLayerInfo::ActivationFunction::SWISH:
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
switch(data_type)
{
@@ -169,7 +176,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
DataType::F32,
});
-const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH));
+const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(),
+ framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH }));
/** Input data sets. */
const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
@@ -215,6 +223,48 @@ void test_float_sqrt_boundary_value()
TEST_SUITE(NEON)
TEST_SUITE(ActivationLayer)
+/** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(ActivationAPI, framework::DatasetMode::ALL)
+{
+ acl::StatusCode err = acl::StatusCode::Success;
+
+ // Create context & Queue
+ acl::Context ctx(acl::Target::Cpu, &err);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+ acl::Queue queue(ctx, &err);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+ // Create activation operator
+ acl::TensorDescriptor src_info({ 2, 3 }, acl::DataType::Float32);
+ acl::TensorDescriptor dst_info({ 2, 3 }, acl::DataType::Float32);
+ acl::ActivationDesc desc{ AclRelu, 6.f, 0.f, false };
+
+ acl::Activation act(ctx, src_info, dst_info, desc, &err);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+ // Create tensors and feed
+ acl::Tensor src(ctx, src_info, &err);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+ acl::Tensor dst(ctx, dst_info, &err);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+ acl::TensorPack pack(ctx);
+ err = pack.add(src, ACL_SRC);
+ err = pack.add(dst, ACL_DST);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+ // Execute operator
+ err = act.run(queue, pack);
+ ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+}
+
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
@@ -236,6 +286,49 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info));
ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
}
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ }))),
+ combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuActivationKernel::get_implementation(ActivationDataTypeISASelectorData{data_type, CPUModel::GENERIC, cpu_isa,ActivationLayerInfo::ActivationFunction::BOUNDED_RELU}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation";
+ if( data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED)
+ {
+#ifdef __aarch64__
+ expected = "neon_q8_activation_lut";
+#else // __aarch64__
+ expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation";
+#endif // __aarch64__
+ }
+ std::string actual = selected_impl->name;
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
@@ -316,9 +409,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int8_t>, fram
TEST_SUITE_END() // QASYMM8_SIGNED
/** Input data sets. */
-const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH
- });
+const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction",
+{
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+});
const auto Int16QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), Int16QuantizedActivationFunctionsDataset),
framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
diff --git a/tests/validation/NEON/AddMulAdd.cpp b/tests/validation/NEON/AddMulAdd.cpp
new file mode 100644
index 0000000000..77e3d80fe6
--- /dev/null
+++ b/tests/validation/NEON/AddMulAdd.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef __aarch64__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAddMulAdd.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/AddMulAddFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+const AbsoluteTolerance<half> tolerance_fp16(half(0.1f)); /**< Tolerance for 16-bit floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance for quantized tests */
+
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+
+ // Boundaries are aligned with Quantized Data ranges -- DOUBLE check before changing
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, -2.f)
+});
+
+// QASYMM8 test quantizations
+const auto qasymm8_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-1, 24.5]
+const auto qasymm8_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, 60) }); // Representable Range: [-12, 39]
+const auto qasymm8_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.11, 0.2]
+const auto qasymm8_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.02, 20) }); // Representable Range: [-0.4, 4.7]
+
+// Representable Range: [-9.36, 51.84], Expected F32 range: [-13, 63.5], leaving some space for saturation
+const auto qasymm8_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, 39) });
+
+// Representable Range: [-4.8, 10.5], Expected FP32 range: [-6.985, 12.7], leaving some space for saturation
+// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU
+const auto qasymm8_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, 80) });
+
+// QASYMM8_SIGNED test quantizations
+const auto qasymm8_signed_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-13.8, 11.7]
+const auto qasymm8_signed_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, -60) }); // Representable Range: [-13.6, 39.4]
+const auto qasymm8_signed_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.183, 0.072]
+const auto qasymm8_signed_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.4, -120) }); // Representable Range: [-0.32, 9.08]
+
+// Representable Range: [-21.36, 39.84], Expected F32 range: [-27.4, 51.1], leaving some space for saturation
+const auto qasymm8_signed_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, -39) });
+
+// Representable Range: [-4.8, 10.5], Expected FP32 range: [-9.6713, 14.0942], leaving some space for saturation
+// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU
+const auto qasymm8_signed_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, -48) });
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(AddMulAdd)
+
+template <typename T>
+using NEAddMulAddFloatFixture = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>;
+
+template <typename T>
+using NEAddMulAddFloatFixtureWoIntermOut = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>;
+
+TEST_SUITE(Float)
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+
+// This test is to stress the case when there is no intermediate output required (i.e. nullptr)
+FIXTURE_DATA_TEST_CASE(RunSmallWithoutIntermOutput, NEAddMulAddFloatFixtureWoIntermOut<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })))
+{
+ // Validate outputs
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+
+TEST_SUITE_END() // F32
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+ validate(Accessor(_target), _reference, tolerance_fp16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+ validate(Accessor(_target), _reference, tolerance_fp16);
+}
+TEST_SUITE_END() // F16
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+TEST_SUITE_END() // Float
+
+template <typename T>
+using NEAddMulQuantizedFixture = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>;
+
+template <typename T>
+using NEAddMulAddQuantizedFixtureWoIntermOut = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>;
+
+TEST_SUITE(Quantized)
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ ActivationFunctionsDataset),
+ qasymm8_input1_qinfo_set),
+ qasymm8_input2_qinfo_set),
+ qasymm8_bn_mul_qinfo_set),
+ qasymm8_bn_add_qinfo_set),
+ qasymm8_add_output_qinfo_set),
+ qasymm8_final_output_qinfo_set))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference, tolerance_quant);
+ validate(Accessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ ActivationFunctionsDataset),
+ qasymm8_input1_qinfo_set),
+ qasymm8_input2_qinfo_set),
+ qasymm8_bn_mul_qinfo_set),
+ qasymm8_bn_add_qinfo_set),
+ qasymm8_add_output_qinfo_set),
+ qasymm8_final_output_qinfo_set))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference, tolerance_quant);
+ validate(Accessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ ActivationFunctionsDataset),
+ qasymm8_signed_input1_qinfo_set),
+ qasymm8_signed_input2_qinfo_set),
+ qasymm8_signed_bn_mul_qinfo_set),
+ qasymm8_signed_bn_add_qinfo_set),
+ qasymm8_signed_add_output_qinfo_set),
+ qasymm8_signed_final_output_qinfo_set))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference, tolerance_quant);
+ validate(Accessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ ActivationFunctionsDataset),
+ qasymm8_signed_input1_qinfo_set),
+ qasymm8_signed_input2_qinfo_set),
+ qasymm8_signed_bn_mul_qinfo_set),
+ qasymm8_signed_bn_add_qinfo_set),
+ qasymm8_signed_add_output_qinfo_set),
+ qasymm8_signed_final_output_qinfo_set))
+{
+ // Validate outputs
+ validate(Accessor(_interm_target), _interm_reference, tolerance_quant);
+ validate(Accessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // AddMulAdd
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // __aarch64__
diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp
index 0a4071076a..91b8128dea 100644
--- a/tests/validation/NEON/ArgMinMax.cpp
+++ b/tests/validation/NEON/ArgMinMax.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,27 @@ namespace test
{
namespace validation
{
+namespace
+{
+const auto OpsDataset = framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX });
+const auto AxisDataset = framework::dataset::make("Axis", { 0, 1, 2, 3 });
+const auto QInfoDataset = framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) });
+
+const auto ArgMinMaxSmallDatasetAxis0 = framework::dataset::make("Shape",
+{
+ TensorShape{ 1U, 5U },
+ TensorShape{ 2U, 3U },
+ TensorShape{ 1U },
+ TensorShape{ 3U },
+ TensorShape{ 2U },
+ TensorShape{ 5U },
+ TensorShape{ 17U },
+ TensorShape{ 15U, 2U },
+});
+using ArgMinMaxSmallDataset = datasets::Small4DShapes;
+using ArgMinMaxLargeDataset = datasets::Large4DShapes;
+}
+
TEST_SUITE(NEON)
TEST_SUITE(ArgMinMax)
@@ -70,23 +91,50 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
// clang-format on
// *INDENT-ON*
-template <typename T>
-using NEArgMinMaxValidationFixture = ArgMinMaxValidationFixture<Tensor, Accessor, NEArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using NEArgMinMaxValidationFixture = ArgMinMaxValidationFixture<Tensor, Accessor, NEArgMinMaxLayer, T1, T2>;
+
+using NEArgMinMaxValidationFixture_S32_S32 = NEArgMinMaxValidationFixture<int32_t, int32_t>;
+using NEArgMinMaxValidationFixture_F16_S32 = NEArgMinMaxValidationFixture<half, int32_t>;
+using NEArgMinMaxValidationFixture_F32_S32 = NEArgMinMaxValidationFixture<float, int32_t>;
+#ifdef __aarch64__
+using NEArgMinMaxValidationFixture_F32_S64 = NEArgMinMaxValidationFixture<float, int64_t>;
+#endif // __aarch64__
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall,
- NEArgMinMaxValidationFixture<int32_t>,
+FIXTURE_DATA_TEST_CASE(RunSmallAxis0,
+ NEArgMinMaxValidationFixture_S32_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDatasetAxis0,
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ framework::dataset::make("Axis", { 0 })),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEArgMinMaxValidationFixture_S32_S32,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
FIXTURE_DATA_TEST_CASE(RunLarge,
- NEArgMinMaxValidationFixture<int32_t>,
+ NEArgMinMaxValidationFixture_S32_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset(),
+ framework::dataset::make("DataTypeIn", DataType::S32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -97,18 +145,26 @@ TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall,
- NEArgMinMaxValidationFixture<half>,
+ NEArgMinMaxValidationFixture_F16_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::F16)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge,
- NEArgMinMaxValidationFixture<half>,
+ NEArgMinMaxValidationFixture_F16_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset(),
+ framework::dataset::make("DataTypeIn", DataType::F16)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -118,18 +174,41 @@ TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall,
- NEArgMinMaxValidationFixture<float>,
+ NEArgMinMaxValidationFixture_F32_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
}
+#ifdef __aarch64__
+FIXTURE_DATA_TEST_CASE(RunSmall_F32_S64,
+ NEArgMinMaxValidationFixture_F32_S64,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S64)),
+ AxisDataset),
+ OpsDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+#endif // __aarch64__
+
FIXTURE_DATA_TEST_CASE(RunLarge,
- NEArgMinMaxValidationFixture<float>,
+ NEArgMinMaxValidationFixture_F32_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+ combine(combine(combine(combine(ArgMinMaxLargeDataset(),
+ framework::dataset::make("DataTypeIn", DataType::F32)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -137,27 +216,35 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
-template <typename T>
-using NEArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<Tensor, Accessor, NEArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using NEArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<Tensor, Accessor, NEArgMinMaxLayer, T1, T2>;
+
+using NEArgMinMaxQuantizedValidationFixture_U8_S32 = NEArgMinMaxQuantizedValidationFixture<uint8_t, int32_t>;
+using NEArgMinMaxQuantizedValidationFixture_S8_S32 = NEArgMinMaxQuantizedValidationFixture<int8_t, int32_t>;
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall,
- NEArgMinMaxQuantizedValidationFixture<uint8_t>,
+ NEArgMinMaxQuantizedValidationFixture_U8_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(Accessor(_target), _reference);
}
-
FIXTURE_DATA_TEST_CASE(RunLarge,
- NEArgMinMaxQuantizedValidationFixture<uint8_t>,
+ NEArgMinMaxQuantizedValidationFixture_U8_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxLargeDataset(),
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -166,22 +253,27 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall,
- NEArgMinMaxQuantizedValidationFixture<int8_t>,
+ NEArgMinMaxQuantizedValidationFixture_S8_S32,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxSmallDataset(),
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(Accessor(_target), _reference);
}
-
FIXTURE_DATA_TEST_CASE(RunLarge,
- NEArgMinMaxQuantizedValidationFixture<int8_t>,
+ NEArgMinMaxQuantizedValidationFixture_S8_S32,
framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) })))
+ combine(combine(combine(combine(combine(ArgMinMaxLargeDataset(),
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", DataType::S32)),
+ AxisDataset),
+ OpsDataset),
+ QInfoDataset))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index ea6656eefe..535c3e634e 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,9 +22,12 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuAddKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ConvertPolicyDataset.h"
@@ -48,26 +51,8 @@ constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for
#else // !defined(__aarch64__) || defined(ENABLE_SVE)
constexpr AbsoluteTolerance<float> tolerance_quant(0);
#endif // !defined(__aarch64__) || defined(ENABLE_SVE)
-
-/** Input data sets **/
-const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
- DataType::U8));
-const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
- framework::dataset::make("DataType", DataType::S16));
-const auto ArithmeticAdditionS32Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S32 }), framework::dataset::make("DataType", DataType::S32)),
- framework::dataset::make("DataType", DataType::S32));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataType", DataType::F32));
-const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataType", DataType::QASYMM8));
-const auto ArithmeticAdditionQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
-const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
- framework::dataset::make("DataType", DataType::QSYMM16));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -79,25 +64,22 @@ using NEArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<Tensor,
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Unsupported broadcast
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),// Mismatching shapes
}),
- framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, false, false, false})),
+ framework::dataset::make("Expected", { true, false, false, false})),
input1_info, input2_info, output_info, expected)
{
Status s = NEArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false),
@@ -106,6 +88,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
ConvertPolicy::WRAP);
ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
}
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+ combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::U8,
+ DataType::S16,
+ DataType::S32,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ })),
+ framework::dataset::make("CanUseFixedpoint", {true, false})),
+ combine(combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::U8,
+ DataType::S16,
+ DataType::S32
+ })),
+ framework::dataset::make("CanUseFixedpoint", {true, false}))),
+ combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ })),
+ framework::dataset::make("CanUseFixedpoint", {true, false}))),
+ cpu_ext, data_type, can_use_fixedpoint)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_use_fixedpoint}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ bool qasymm8_any = (data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED);
+
+ std::string expected;
+ if(qasymm8_any && can_use_fixedpoint)
+ {
+ expected = "neon_" + cpu_impl_dt(data_type) + "_add_fixedpoint";
+ }
+ else
+ {
+ expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add";
+ }
+
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
@@ -127,8 +166,10 @@ TEST_CASE(NoPaddingAdded, framework::DatasetMode::PRECOMMIT)
TEST_SUITE(Integer)
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::U8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -136,15 +177,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework
TEST_SUITE_END() // U8
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -152,8 +197,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework
TEST_SUITE_END() // S16
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -164,8 +211,9 @@ TEST_SUITE_END() // Integer
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -174,15 +222,19 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -191,17 +243,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::
template <typename T>
using NEArithmeticAdditionBroadcastFixture = ArithmeticAdditionBroadcastValidationFixture<Tensor, Accessor, NEArithmeticAddition, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
- ArithmeticAdditionFP32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
- ArithmeticAdditionFP32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -220,11 +274,12 @@ TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEArithmeticAdditionQuantizedFixture<uint8_t>,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
- framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_quant);
@@ -235,22 +290,24 @@ TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEArithmeticAdditionQuantizedFixture<int8_t>,
framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8SIGNEDDataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })),
- framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })))
+ combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_quant);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionQuantizedBroadcastFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(
- datasets::SmallShapesBroadcast(), ArithmeticAdditionQASYMM8SIGNEDDataset),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionQuantizedBroadcastFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(
+ datasets::SmallShapesBroadcast(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })),
framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })))
+ framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_quant);
@@ -261,11 +318,12 @@ TEST_SUITE(QSYMM16)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEArithmeticAdditionQuantizedFixture<int16_t>,
framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQSYMM16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
- framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+ combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_quant);
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index 7a36893445..8886ca2db5 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -50,45 +50,16 @@ constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value fo
#endif //__aarch64__
constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-/** Input data sets **/
-const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataType", DataType::QASYMM8));
-
-const auto ArithmeticSubtractionQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
-
-const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16),
- framework::dataset::make("DataType", DataType::QSYMM16)),
- framework::dataset::make("DataType", DataType::QSYMM16));
-
-const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8),
- framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("DataType", DataType::U8));
-
-const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }),
- framework::dataset::make("DataType", DataType::S16)),
- framework::dataset::make("DataType", DataType::S16));
-
-const auto ArithmeticSubtractionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
- framework::dataset::make("DataType", DataType::S32)),
- framework::dataset::make("DataType", DataType::S32));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataType", DataType::F32));
-
+// Quantization Infomation DataSet
const auto ArithmeticSubtractionQuantizationInfoDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(10, 120) }),
framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(20, 110) })),
framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(15, 125) }));
const auto ArithmeticSubtractionQuantizationInfoSignedDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.5f, 10) }),
framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.5f, 20) })),
framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.5f, 50) }));
+const auto ArithmeticSubtractionQuantizationInfoSignedInPlaceDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.8f, 10) }),
+ framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.8f, 10) })),
+ framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.8f, 10) }));
const auto ArithmeticSubtractionQuantizationInfoSymmetric = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.3f, 0) }),
framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.7f, 0) })),
framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.2f, 0) }));
@@ -105,35 +76,31 @@ using NEArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<Te
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
- framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::QASYMM8), // Mismatching types
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid convert policy
}),
- framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
- TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
})),
- framework::dataset::make("ConvertPolicy",{ ConvertPolicy::WRAP,
- ConvertPolicy::SATURATE,
- ConvertPolicy::SATURATE,
- ConvertPolicy::WRAP,
- ConvertPolicy::WRAP,
- ConvertPolicy::WRAP,
+ framework::dataset::make("ConvertPolicy",{ ConvertPolicy::SATURATE,
+ ConvertPolicy::SATURATE,
+ ConvertPolicy::WRAP,
+ ConvertPolicy::WRAP,
+ ConvertPolicy::WRAP,
})),
- framework::dataset::make("Expected", { true, true, false, false, false, false})),
+ framework::dataset::make("Expected", { true, false, false, false, false})),
input1_info, input2_info, output_info, policy, expected)
{
ARM_COMPUTE_EXPECT(bool(NEArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy)) == expected, framework::LogLevel::ERRORS);
@@ -194,7 +161,8 @@ TEST_CASE(InvalidBroadcastBoth, framework::DatasetMode::ALL)
TEST_SUITE_END() // InPlaceValidate
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::U8)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -210,10 +178,11 @@ using NEArithmeticSubtractionQSYMM16Fixture = ArithmeticSubtracti
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionQASYMM8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::QASYMM8)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
ArithmeticSubtractionQuantizationInfoDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -222,19 +191,17 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(
- datasets::SmallShapes(),
- ArithmeticSubtractionQASYMM8SIGNEDDataset),
+ datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
ArithmeticSubtractionQuantizationInfoSignedDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-
FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionQASYMM8SignedBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(
datasets::SmallShapesBroadcast(),
- ArithmeticSubtractionQASYMM8SIGNEDDataset),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
ArithmeticSubtractionQuantizationInfoSignedDataset),
OutOfPlaceDataSet))
@@ -242,12 +209,22 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionQASYMM8SignedBr
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEArithmeticSubtractionQASYMM8SignedBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(
+ datasets::TinyShapesBroadcastInplace(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ ArithmeticSubtractionQuantizationInfoSignedInPlaceDataset),
+ InPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQSYMM16Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine(
datasets::SmallShapes(),
- ArithmeticSubtractionQSYMM16Dataset),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
ArithmeticSubtractionQuantizationInfoSymmetric),
OutOfPlaceDataSet))
@@ -259,7 +236,8 @@ TEST_SUITE_END() // QSYMM16
TEST_SUITE_END() // Quantized
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -267,7 +245,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framew
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::S16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -277,7 +256,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framew
TEST_SUITE_END() // S16
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::S32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -285,7 +265,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framew
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::S32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -297,7 +278,8 @@ TEST_SUITE_END() // S32
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::F16)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -308,7 +290,8 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+ DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
InPlaceDataSet))
{
@@ -316,7 +299,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framewor
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+ DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -328,7 +312,7 @@ template <typename T>
using NEArithmeticSubtractionBroadcastFixture = ArithmeticSubtractionBroadcastValidationFixture<Tensor, Accessor, NEArithmeticSubtraction, T>;
FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
@@ -337,7 +321,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionBroadcastFixtur
}
FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticSubtractionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
- ArithmeticSubtractionFP32Dataset),
+ framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
OutOfPlaceDataSet))
{
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index a1ae6971f4..50eaf0c667 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -51,7 +51,7 @@ namespace
RelativeTolerance<float> rel_tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.015f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const auto act_infos = framework::dataset::make("ActivationInfo",
diff --git a/tests/validation/NEON/BatchToSpaceLayer.cpp b/tests/validation/NEON/BatchToSpaceLayer.cpp
index a305dcbcc4..8cf11b7b95 100644
--- a/tests/validation/NEON/BatchToSpaceLayer.cpp
+++ b/tests/validation/NEON/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,55 +49,38 @@ using NEBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<Tensor, Ac
// *INDENT-OFF*
// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Mismatching data types
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Wrong data type block shape
- TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
- }),
- framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 4U), 1, DataType::S32),
- TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
- })),
- framework::dataset::make("Expected", { true, true, true, false, false, false})),
- input_info, block_shape_info, output_info, expected)
-{
- bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
- ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
-}
-DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky
- TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx
- TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Mismatching data types
- TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Negative block shapes
- TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blockx > blocky
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blocky > blockx
+ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Mismatching data types
+ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Negative block shapes
+ TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32),// Unsupported tensor rank
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid batch dimension)
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid spatial dimension)
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: correct tensor shape with cropping
+ TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid tensor shape with cropping
}),
- framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })),
- framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })),
+ framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2, 2, 2, 2, 2 })),
+ framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2, 2, 2, 2, 2 })),
+ framework::dataset::make("CropInfo", {
+ CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{3, 2, 1, 3}, CropInfo{3, 2, 1, 3}
+ })),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(64U, 16U, 2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 32U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16),
TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(33U, 32U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(27, 12U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 16U, 2U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, true, true, false, false, false})),
- input_info, block_shape_x, block_shape_y, output_info, expected)
+ framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true, false})),
+ input_info, block_shape_x, block_shape_y, crop_info, output_info, expected)
{
- bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
+ bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false), crop_info));
ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
}
// clang-format on
@@ -112,6 +95,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<float>, framework::D
// Validate output
validate(Accessor(_target), _reference);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
@@ -129,6 +122,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<half>, framework::Da
// Validate output
validate(Accessor(_target), _reference);
}
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+
FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp
index db73bea9cb..b56594546b 100644
--- a/tests/validation/NEON/Cast.cpp
+++ b/tests/validation/NEON/Cast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,9 +22,12 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NECast.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuCastKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ConvertPolicyDataset.h"
@@ -34,7 +37,6 @@
#include "tests/framework/datasets/Datasets.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/CastFixture.h"
-
namespace arm_compute
{
namespace test
@@ -99,6 +101,11 @@ const auto CastF32toS32Dataset = combine(framework::dataset::make("Da
const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8));
const auto CastF32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
+// U64
+const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32));
+
+// S64
+const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32));
} // namespace
TEST_SUITE(NEON)
@@ -106,6 +113,8 @@ TEST_SUITE(Cast)
template <typename T>
using NECastToU8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint8_t>;
template <typename T>
+using NECastToS8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int8_t>;
+template <typename T>
using NECastToU16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint16_t>;
template <typename T>
using NECastToS16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int16_t>;
@@ -114,6 +123,10 @@ using NECastToU32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, ui
template <typename T>
using NECastToS32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int32_t>;
template <typename T>
+using NECastToU64Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint64_t>;
+template <typename T>
+using NECastToS64Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int64_t>;
+template <typename T>
using NECastToF16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, half>;
template <typename T>
using NECastToF32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, float>;
@@ -187,6 +200,66 @@ CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture<float>,
CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
+#ifdef __aarch64__
+// S64
+CAST_SUITE(S64_to_F32, DataType::S64, DataType::F32, NECastToF32Fixture<int64_t>, CastS64toF32Dataset, zero_tolerance)
+
+// U64
+CAST_SUITE(U64_to_F32, DataType::U64, DataType::F32, NECastToF32Fixture<uint64_t>, CastU64toF32Dataset, zero_tolerance)
+#endif // __aarch64__
+
+DATA_TEST_CASE(KernelSelectionDstFP16, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType",
+{
+ DataType::F16,
+ DataType::U8,
+ DataType::S32,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+})),
+cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+ const CpuCastKernel::CastKernel *selected_impl;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = true;
+
+ selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_cast";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
+DATA_TEST_CASE(KernelSelectionSrcFP32, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType",
+{
+ DataType::F16,
+})),
+cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ DataType::F32, data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_fp32_to_" + cpu_impl_dt(data_type) + "_cast";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
TEST_SUITE_END() // Cast
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp
index 9139f0cca8..7eb8cbf0f6 100644
--- a/tests/validation/NEON/Col2Im.cpp
+++ b/tests/validation/NEON/Col2Im.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/cpu/kernels/CpuCol2ImKernel.h"
#include "tests/NEON/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
@@ -39,7 +39,7 @@ namespace validation
TEST_SUITE(NEON)
TEST_SUITE(Col2Im)
-using NECol2Im = NESynthetizeFunction<NECol2ImKernel>;
+using CpuCol2Im = NESynthetizeFunction<cpu::kernels::CpuCol2ImKernel>;
// *INDENT-OFF*
// clang-format off
@@ -59,7 +59,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
framework::dataset::make("Expected", { false, false, false, true })),
input_info, output_info, convolved_width, convolved_height, expected)
{
- bool status = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
+ bool status = bool(CpuCol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
}
// clang-format on
diff --git a/tests/validation/NEON/Convolution3D.cpp b/tests/validation/NEON/Convolution3D.cpp
new file mode 100644
index 0000000000..4185488742
--- /dev/null
+++ b/tests/validation/NEON/Convolution3D.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEConv3D.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolution3DFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
+const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
+
+/** Activation function Dataset*/
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+});
+
+const auto data_precommit = combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ datasets::SmallDirectConv3DShapes(),
+ framework::dataset::make("StrideX", { 1, 5, 8 })),
+ framework::dataset::make("StrideY", { 1, 2, 3 })),
+ framework::dataset::make("StrideZ", { 1, 2, 1 })),
+ framework::dataset::make("PadX", { 0, 1, 2 })),
+ framework::dataset::make("PadY", { 0, 2, 1 })),
+ framework::dataset::make("PadZ", { 0, 3, 5 })),
+ framework::dataset::make("KernelWidth", { 3, 5, 9 })),
+ framework::dataset::make("KernelHeight", { 2, 1, 3 })),
+ framework::dataset::make("KernelDepth", { 1, 2, 3 })),
+ framework::dataset::make("NumKernels", { 2, 3, 8 })),
+ framework::dataset::make("HasBias", { true, false })),
+ ActivationFunctionsDataset);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Convolution3D)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NHWC), // Invalid data layout
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::U32, DataLayout::NDHWC), // Invalid data type
+ }),
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F16),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 3U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U, 3U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::U32),
+ })),
+ framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(3U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U, 2U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(4U), 1U, DataType::F32),
+ })),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(26U, 11U, 4U), 1U, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::U32),
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, false, false})),
+ input_info, weights_info, biases_info, output_info, expected)
+{
+ const Conv3dInfo conv3d_info(Size3D(1, 1, 1), Padding3D(0, 0, 0), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false);
+ bool is_valid = bool(NEConv3D::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv3d_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using NEDirectConvolution3DFixture = DirectConvolution3DValidationFixture<Tensor, Accessor, NEConv3D, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit,
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NDHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // FP32
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit,
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NDHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE_END() // Float
+
+template <typename T>
+using NEDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture<Tensor, Accessor, NEConv3D, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+ framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+ framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+ framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+ TensorShape(15U, 7U, 11U, 7U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U)
+ }),
+ framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+ framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+ framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+ framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+ framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+ framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+ framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+ framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+ framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+ framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+ framework::dataset::make("HasBias", { true, true, true, false })),
+ framework::dataset::make("Activation", ActivationLayerInfo())),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+ framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+ framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+ framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // Convolution3D
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 9e00da16ae..d739d4e1a4 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,11 +28,16 @@
#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "src/cpu/operators/CpuGemmConv2d.h"
+#include "src/cpu/operators/CpuGemmDirectConv2d.h"
+#include "src/cpu/operators/CpuWinogradConv2d.h"
+
#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
#include "tests/datasets/LargeConvolutionLayerDataset.h"
#include "tests/datasets/SmallConvolutionLayerDataset.h"
-#include "tests/datasets/TinyConvolutionLayerDataset.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
#include "tests/framework/datasets/Datasets.h"
@@ -46,6 +51,8 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
+
namespace detail
{
template <>
@@ -77,10 +84,17 @@ const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2
const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+
+#ifdef ARM_COMPUTE_ENABLE_SME
+// TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode.
+// Temporarily increase the tolerance for quantized data.
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#else // ARM_COMPUTE_ENABLE_SME
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#endif // ARM_COMPUTE_ENABLE_SME
/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
DataType::F16,
@@ -88,14 +102,41 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
DataType::F32,
DataType::QASYMM8,
});
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
});
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto NoActivation = make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+});
+
+const auto ActivationFunctionsDatasetNightly = make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+#ifdef __aarch64__
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU),
+#endif // __aarch64__
+});
+
+const auto QuantizationData = make("QuantizationInfo",
{
QuantizationInfo(0.5f, 10),
QuantizationInfo(0.3f, 3),
@@ -110,32 +151,32 @@ TEST_SUITE(ConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
+ make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
+ make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
+ make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
})),
- framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(2, 1, 0, 0),
PadStrideInfo(3, 2, 1, 0)
})),
- framework::dataset::make("FastMath", { true,
+ make("FastMath", { true,
true,
false,
false
})),
- framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+ make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
input_info, weights_info, output_info, conv_info, fast_math, expected)
{
ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
@@ -147,6 +188,14 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
// *INDENT-ON*
TEST_SUITE_END() // ConvolutionLayer
+/*
+ Testing Strategy of Neon Winograd:
+ - There is no need to thoroughly test nchw cases because winograd kernels accept
+ nhwc and the tensors are permuted before and after if they're nchw.
+ - Except relu and bounded relu, testing activations for a single input
+ combination is enough because activation is not fused into winograd and called
+ separately.
+*/
TEST_SUITE(WinogradLayer)
template <typename T>
using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
@@ -156,38 +205,250 @@ using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLaye
template <typename T>
using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
+/** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+ auto winograd = std::make_unique<cpu::CpuWinogradConv2d>();
+ const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
+ const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
+ const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
+ auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
+ const PadStrideInfo pad_info{};
+
+ winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
+
+ // telhs are newly created every call of this lambda function
+ auto a = create_tensor<Tensor>(src_info);
+ auto b = create_tensor<Tensor>(b_info);
+ auto c = create_tensor<Tensor>(w_info);
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ c.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &a }, { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+ library->fill_tensor_value(Accessor(a), 1.f);
+ library->fill_tensor_value(Accessor(b), 2.f);
+ library->fill_tensor_value(Accessor(c), 3.f);
+
+ // This operator is configured once and captured by this lambda.
+ winograd->prepare(prep_pack);
+ winograd->run(run_pack);
+ return dst;
+ };
+
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Test case for memory injection in @ref NEWinogradConvolutionLayer.
+ *
+ * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+ auto gemm = std::make_unique<NEWinogradConvolutionLayer>();
+ const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
+ const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
+ const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
+ auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
+ const PadStrideInfo pad_info{};
+
+ auto run_conv = [&]()
+ {
+ auto src = create_tensor<Tensor>(src_info);
+ auto w = create_tensor<Tensor>(w_info);
+ auto b = create_tensor<Tensor>(b_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+
+ gemm->configure(&src, &b, &w, &dst, pad_info);
+
+ src.allocator()->allocate();
+ b.allocator()->allocate();
+ w.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(b), 2.f);
+ library->fill_tensor_value(Accessor(w), 3.f);
+ gemm->run();
+ return dst;
+ };
+
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip(
+ make("WeightsInfo",
+{
+ // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted
+
+ // Fp32, NCHW/NHWC (layout does not matter as it's )
+ // 3x1, 1x3, 3x3 --> all TRUE
+ TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // 5x1, 1x5, 5x5 --> all TRUE
+ TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // 7x1, 1x7, 7x7
+ // --> all FALSE
+ TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+ // unsupported kernel sizes
+ TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+ // Fp16
+ TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+ // 5x1, 1x5, 5x5 --> all TRUE
+ TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+ // 7x1, 1x7, 7x7
+ // --> all FALSE
+ TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+ TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+
+ // unsupported kernel sizes
+ TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+}),
+make("Expected",
+{
+ // fp32
+ true, true, true, // 3x3, 1x3, 3x1
+ true, true, true, // 5x5, 1x5, 5x1
+ false, true, true, // 7x7, 1x7, 7x1
+ false, false, false, // random unsupported kernels
+
+ // fp16
+ true, false, false, // 3x3, 1x3, 3x1
+ false, false, false, // 5x5, 1x5, 5x1
+ false, false, false, // 7x7, 1x7, 7x1
+ false, false, false, // random unsupported kernels
+})),
+weights_info_const, expected_const)
+{
+ DataType data_type = weights_info_const.data_type();
+ DataLayout data_layout = weights_info_const.data_layout();
+
+ TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type);
+ TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type);
+ TensorInfo weights_info = weights_info_const;
+
+ if(data_layout == DataLayout::NHWC)
+ {
+ // Convert to NHWC
+ PermutationVector perm = PermutationVector(2U, 0U, 1U);
+
+ TensorShape input_shape = input_info.tensor_shape();
+ TensorShape weights_shape = weights_info.tensor_shape();
+ permute(input_shape, perm);
+ permute(weights_shape, perm);
+
+ input_info.set_tensor_shape(input_shape);
+ weights_info.set_tensor_shape(weights_shape);
+
+ input_info.set_data_layout(data_layout);
+ weights_info.set_data_layout(data_layout);
+ bias_info.set_data_layout(data_layout);
+ }
+
+ PadStrideInfo conv_info(1, 1, 0, 0);
+
+ TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info);
+ TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout);
+
+ Status status = NEWinogradConvolutionLayer::validate(
+ &input_info,
+ &weights_info,
+ &bias_info,
+ &output_info,
+ conv_info,
+ ActivationLayerInfo(),
+ true /* fast math */);
+
+ Status fp16_supported = ::arm_compute::error_on_unsupported_cpu_fp16("N/A", "N/A", 0, &input_info);
+ bool expected = expected_const && static_cast<bool>(fp16_supported);
+
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
TEST_SUITE(FP32)
TEST_SUITE(Conv1x3)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
- framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
- framework::dataset::make("Bias", TensorShape(1U))),
- framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1U, 1U))),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(
+ make("Input", TensorShape(8U, 8U, 32U)),
+ make("Weight", TensorShape(1U, 3U, 32U, 1U)),
+ make("Bias", TensorShape(1U)),
+ make("Output", TensorShape(8U, 6U, 1U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -197,19 +458,19 @@ TEST_SUITE_END() // Conv1x3
TEST_SUITE(Conv3x1)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -219,19 +480,19 @@ TEST_SUITE_END() // Conv3x1
TEST_SUITE(Conv1x5)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -241,19 +502,19 @@ TEST_SUITE_END() // Conv1x5
TEST_SUITE(Conv5x1)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -263,10 +524,10 @@ TEST_SUITE_END() // Conv5x1
TEST_SUITE(Conv7x1)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
@@ -274,9 +535,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, frame
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ make("DataType", { DataType::F32 })),
+ make("ActivationInfo", { ActivationLayerInfo() })),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -285,20 +546,20 @@ TEST_SUITE_END() // Conv7x1
TEST_SUITE(Conv1x7)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -307,20 +568,40 @@ TEST_SUITE_END() // Conv1x7
TEST_SUITE(Conv3x3)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+{
+ // Validate output
+ validate(Accessor(_target), _reference, abs_tolerance_f32);
+}
+/// It's enough to run the activations for a single weight/input combination and data type because
+/// activation function is called on top of the winograd output as a separate operator
+/// TODO: Enable after COMPMID-6573 is resolved
+FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::DISABLED,
+ combine(
+ make("Input", TensorShape(3U, 3U, 32U)),
+ make("Weight", TensorShape(3U, 3U, 32U, 4U)),
+ make("Bias", TensorShape(4U)),
+ make("Output", TensorShape(1U, 1U, 4U)),
+ make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1U, 1U)),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDatasetNightly,
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
+
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
@@ -331,20 +612,20 @@ TEST_SUITE_END() // Conv3x3
TEST_SUITE(Conv5x5)
FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
+ make("DataType", { DataType::F32 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
@@ -354,12 +635,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, frame
TEST_SUITE_END() // Conv5x5
FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
- datasets::SmallWinogradConvolutionLayer5x5Dataset()),
- framework::dataset::make("DataType", { DataType::F32 })),
- ActivationFunctionsDataset),
-
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(framework::dataset::concat(
+ datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+ datasets::SmallWinogradConvolutionLayer5x5Dataset()),
+ make("DataType", { DataType::F32 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, abs_tolerance_f32);
@@ -371,12 +652,39 @@ TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(
+ make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
+ TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
+ }),
+ make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
+ }),
+ make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
+ }),
+ make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0)
+ }),
+ make("FastMath",
+{
+ false, // case fp16 and fast_math False then disable Winograd
+ true // case fp16 and fast_math True then enable Winograd
+}),
+make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
+input_info, weights_info, output_info, conv_info, fast_math, expected)
+{
+ ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
+ &weights_info.clone()->set_is_resizable(true),
+ &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
TEST_SUITE(Conv3x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F16 }),
+ ActivationFunctionsDataset,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
@@ -384,10 +692,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
- framework::dataset::make("DataType", { DataType::F16 })),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+ make("DataType", { DataType::F16 }),
+ make("ActivationInfo", { ActivationLayerInfo() }),
+ make("DataLayout", { DataLayout::NHWC })))
{
// Validate output
@@ -398,18 +706,470 @@ TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE_END() // WinogradLayer
+#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+TEST_SUITE(FIXED_FORMAT_KERNELS)
+TEST_SUITE(VariableWeightUtils)
+
+// UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
+
+template <typename ConvolutionClass>
+using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
+
+template <typename ConvolutionClass>
+using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
+
+// UC2_1
+
+FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+// UC2_2_* tests: the user requests a specific fixed format, and a
+// kernel that support that fixed format is found.
+
+FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
+{
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
+{
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
+}
+
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+// These tests currently only works with SVE length 256
+// If other SVE length is used a kernel will fail to be found
+// This needs to be addressed in order to ensure it doesn't revert to FP32 kernels for systems with SVE length other than 256
+FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
+{
+ if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
+ }
+ else{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
+{
+ if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
+ }
+ else{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+ }
+}
+
+#endif // ARM_COMPUTE_ENABLE_BF16
+
+// UC3_1_* tests: the user queries for ANY fixed format, but there is
+// no kernel that support the use case specified by the user (for
+// example, there is no fixed format kernel for the datatype of the
+// problem).
+
+FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::S32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::S32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::S32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::S32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
+}
+
+// UC3_2_* tests: the user queries for ANY fixed format. The search
+// succeeded and the fixed format found is prompted back for
+// consumption by the user. Note that we just test the
+// _computed_weight_format to be anything but not the formats that are
+// not fixed formats (ANY and UNSPECIFIED). This is because the weight
+// format that the runtime produces depends on the size of the vector
+// units of the hardware where the tests is executed. For example, a
+// format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
+// is replaced by OHWIo8 when running on 256-bit SVE.
+
+FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+}
+
+FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+}
+
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+
+FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ if(Scheduler::get().cpu_info().has_bf16()){
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
+ }
+ else{
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
+{
+ if(Scheduler::get().cpu_info().has_bf16()){
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
+ }
+ else{
+ ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
+ }
+}
+
+#endif // ARM_COMPUTE_ENABLE_BF16
+
+namespace
+{
+using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
+auto prepare_weights_shapes = framework::dataset::make("TensorShape",
+{
+ // OHWIo<interleave_by>i<block_by>
+ //
+ // OHWI --> O'HWI', where:
+ //
+ // O'= smallest multiple of <interleave_by> such that O<=O'
+ // I'= smallest multiple of <block_by> such that I<=I'
+ //
+
+ // Change N for OHWIo4
+ TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
+ // // Change N for OHWIo8
+ TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
+ // // Change N for OHWIo4 when H, W and C are not 1
+ TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
+
+ // // Fix N and move HWI around, with different data layouts and formats
+ TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
+ TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
+
+ // // Adding <block_by> on I (=C)
+ TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
+ TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
+ TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
+
+ // ---------
+ TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+ TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
+
+});
+} // unnamed namespace
+
+DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
+ prepare_weights_shapes, shapes)
+{
+ const TensorShape input_shape = std::get<0>(shapes);
+ const TensorShape expected_shape = std::get<1>(shapes);
+ const arm_compute::WeightFormat wf = std::get<2>(shapes);
+ const DataType DT = DataType::F32;
+ const DataLayout DL = DataLayout::NHWC;
+ const auto TI = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
+ const TensorInfo computed_info = ::arm_compute::test::validation::prepare_weights(TI, wf);
+ ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END() // VariableWeightUtils
+
+TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
+
+template <typename ScalarType>
+using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
+
+FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F32 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F16 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+}
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+template <typename ScalarType>
+using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
+
+FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F32 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+#endif // ARM_COMPUTE_ENABLE_BF16
+
+TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
+
+TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
+
+template <typename ScalarType>
+using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
+
+FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F32 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F16 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+}
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+template <typename ScalarType>
+using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
+
+FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ACL Scalar type", { DataType::F32 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+#endif // ARM_COMPUTE_ENABLE_BF16
+
+TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
+TEST_SUITE_END() // FIXED_FORMAT_KERNELS
+
+#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+
TEST_SUITE(GEMMConvolutionLayer)
template <typename T>
using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
template <typename T>
+using NEGEMMConvolutionLayerPaddedWeightsFixture = ConvolutionValidationPaddedWeightsFixture<Tensor, Accessor, NEConvolutionLayer, T>;
+template <typename T>
using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
+/** Test case for memory injection in @ref cpu::CpuGemmConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+ auto conv = std::make_unique<cpu::CpuGemmConv2d>();
+ const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
+ const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
+ const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
+ auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
+ const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
+ WeightsInfo weights_info(false, 3U, 3U, 1U);
+ conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
+
+ // tensors are newly created every call of this lambda function
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ // This operator is configured once and captured by this lambda.
+ conv->prepare(prep_pack);
+ conv->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Test case for memory injection in @ref NEGEMMConvolutionLayer.
+ *
+ * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+ auto conv = std::make_unique<NEGEMMConvolutionLayer>();
+ const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
+ const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
+ const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
+ auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
+ const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
+ WeightsInfo weights_info(false, 3U, 3U, 1U);
+ auto run_conv = [&]()
+ {
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ conv->run();
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
TEST_SUITE(Float)
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
TEST_SUITE(BFLOAT16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::BFLOAT16)),
+ framework::dataset::make("DataType", Scheduler::get().cpu_info().has_bf16() ? DataType::BFLOAT16 : DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
@@ -417,7 +1177,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
}
TEST_SUITE_END() // BFLOAT16
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -459,9 +1219,39 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerMixedDataLayout
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
}
+/** Padded weights
+ * CpuGemmConv2d uses two different paths for reshaping the weights based on if the weight tensor has holes (a common
+ * way to have "holes" in tensor is via extended paddings)
+ *
+ * We only need to test the padded weight path here on a single floating data type and a single layout, because the fallback path is agnostic of them
+ */
+FIXTURE_DATA_TEST_CASE(RunPaddedWeights, NEGEMMConvolutionLayerPaddedWeightsFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("ReshapeWeights", { true }),
+ framework::dataset::make("DataType", DataType::F32),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })
+ ))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
+// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes
+// and weight dimensions larger than 7
+FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::VeryLargeConvolutionLayerDataset(),
+ framework::dataset::make("ReshapeWeights", { true }),
+ framework::dataset::make("DataType", DataType::F32),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+// TODO: COMPMID-6596 Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights)
template <typename T>
using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
template <typename T>
@@ -477,12 +1267,17 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
});
TEST_SUITE(Quantized)
+/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored
+/// This is because instead of using the same quantization information for all the tensors, the fixture generates
+/// separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
+/// again, with the explicitly specified quantization info removed
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })),
QuantizedActivationFunctionsDataset))
{
// Validate output
@@ -499,7 +1294,7 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixtur
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })),
QuantizedActivationFunctionsDataset))
{
// Validate output
@@ -512,7 +1307,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>,
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })),
+ framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(0.01f, -10) })),
QuantizedActivationFunctionsDataset))
{
// Validate output
@@ -529,7 +1324,7 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixtur
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })),
QuantizedActivationFunctionsDataset))
{
// Validate output
@@ -562,6 +1357,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannel
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
+
+FIXTURE_DATA_TEST_CASE(MemoryStressLargeChannels, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>,
+ framework::DatasetMode::ALL,
+ combine(
+ make("In", TensorShape(1U)),
+ make("Weights", TensorShape(1U, 1U, 1U, 17000U)),
+ make("Biases", TensorShape(17000U)),
+ make("Out", TensorShape(1U, 1U, 17000U)),
+ make("Info", PadStrideInfo(1, 1, 0, 0)),
+ make("Dilation", Size2D(1, 1)),
+ make("ReshapeWeights", { true }),
+ make("DataType", { DataType::QASYMM8_SIGNED }),
+ make("DataLayout", { DataLayout::NHWC }),
+ make("QuantizationInfo", QuantizationInfo(0.5f, 10)),
+ make("ActivationInfo", ActivationLayerInfo()),
+ make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
TEST_SUITE_END() // QSYMM8_PER_CHANNEL
TEST_SUITE_END() // Quantized
@@ -571,6 +1387,99 @@ TEST_SUITE(DirectGEMMConv2d)
template <typename T>
using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
+/** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+ auto conv = std::make_unique<cpu::CpuGemmDirectConv2d>();
+ const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
+ const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
+ const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
+ auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
+ const auto conv_info = Conv2dInfo{};
+ conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
+
+ // tensors are newly created every call of this lambda function
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ // This operator is configured once and captured by this lambda.
+ conv->prepare(prep_pack);
+ conv->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Test case for memory injection in @ref NEGEMMConv2d.
+ *
+ * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+ auto conv = std::make_unique<NEGEMMConv2d>();
+ const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
+ const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
+ const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
+ auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
+ const auto conv_info = Conv2dInfo{};
+ auto run_conv = [&]()
+ {
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ conv->configure(&src, &weight, &bias, &dst, conv_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ conv->run();
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 19bd742a61..b4c049f6f9 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,55 +47,86 @@ constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for
constexpr AbsoluteTolerance<float> tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const RelativeTolerance<half_float::half> tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr float tolerance_num_fp16 = 0.02f; /**< Tolerance number for FP16 tests -- follows a slightly stricter approach compared to ConvolutionLayer tests */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
-constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+constexpr float tolerance_num_quant = 0.07f; /**< Tolerance number for quantized types */
const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
- * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
+ * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels",
+{
+ 3
+});
const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
- * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
+ * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels",
+{
+ 3
+});
const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
- * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+ * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels",
+{
+ 3
+});
-const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
- 2)
- *framework::dataset::make("PadLeft", 3)
- *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape
+{
+ 10U, 10U, 1U, 1U
+})
+*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop",
+ 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
-const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
- 2)
- *framework::dataset::make("PadLeft", 3)
- *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape
+{
+ 640U, 360U, 56U, 1U
+})
+*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop",
+ 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
- * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
+ * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels",
+{
+ 3
+});
const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
- * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+ * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels",
+{
+ 3
+});
-const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
+const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+ * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels",
+{
+ 3
+});
+
+const auto data_layouts_dataset = framework::dataset::make("DataLayout",
+{
+ DataLayout::NCHW, DataLayout::NHWC
+});
-const auto add_bias_dataset = framework::dataset::make("AddBias", { true, false });
+const auto add_bias_dataset = framework::dataset::make("AddBias",
+{
+ true, false
+});
const auto input_qinfo_dataset = framework::dataset::make("InputQInfo",
{
QuantizationInfo(1.f / 255.f, 0),
- QuantizationInfo(2.f, 0),
+ QuantizationInfo(2.f, 0),
});
const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo",
{
QuantizationInfo(3.f / 255.f, 0),
- QuantizationInfo(4.f, 0),
+ QuantizationInfo(4.f, 0),
});
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(DeconvolutionLayer)
-
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
@@ -105,6 +136,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape
TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U,2U,1U,1U), 1, DataType::F32), // Small shape no padding
+ TensorInfo(TensorShape(3U,26U,26U,1U), 1, DataType::F32), // Negative padding
}),
framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
@@ -112,6 +145,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U,3U,1U,1U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U,1U,26U,88U), 1, DataType::F32),
})),
framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16),
TensorInfo(TensorShape(1U), 1, DataType::F32),
@@ -119,6 +154,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
TensorInfo(TensorShape(1U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U), 1, DataType::F32),
+ TensorInfo(TensorShape(88U), 1, DataType::F32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
@@ -126,6 +163,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U,4U,1U,1U), 1, DataType::F32),
+ TensorInfo(TensorShape(1U,78U,88U,1U), 1, DataType::F32),
})),
framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -133,8 +172,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 1, 1),
PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(2, 3, 3, 1),
})),
- framework::dataset::make("Expected", { false, false, false, false, false, true })),
+ framework::dataset::make("Expected", { false, false, false, false, false, true,true, false })),
input_info, weights_info, bias_info, output_info, pad_info, expected)
{
bool is_valid = bool(NEDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info));
@@ -158,6 +199,9 @@ using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<
template <typename T>
using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
+template <typename T>
+using NEDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 5, 1>;
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
TEST_SUITE(W4x4)
@@ -221,6 +265,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9<float>, fra
validate(Accessor(_target), _reference, tolerance_fp32);
}
TEST_SUITE_END() // W9x9
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)),
+ data_layouts_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W5x1
TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -231,7 +284,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<half>, framework::Dat
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_fp16);
+ validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
}
TEST_SUITE_END() // W4x4
TEST_SUITE(W3x3)
@@ -241,14 +294,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3<half>, framework
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_fp16);
+ validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)),
data_layouts_dataset),
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_fp16);
+ validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
}
TEST_SUITE_END() // W3x3
TEST_SUITE(W1x1)
@@ -257,9 +310,18 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<half>, framework::Dat
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_fp16);
+ validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)),
+ data_layouts_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+}
+TEST_SUITE_END() // W5x1
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -275,6 +337,9 @@ template <typename T>
using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
template <typename T>
+using NEDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 5, 1>;
+
+template <typename T>
using NEDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 4, 4>;
template <typename T>
@@ -283,6 +348,9 @@ using NEDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidatio
template <typename T>
using NEDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 1, 1>;
+template <typename T>
+using NEDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 5, 1>;
+
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
@@ -295,7 +363,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W4x4
@@ -309,7 +377,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType",
@@ -320,7 +388,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W3x3
@@ -333,10 +401,23 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
@@ -350,7 +431,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<int8_t>, fra
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W4x4
@@ -364,7 +445,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType",
@@ -375,7 +456,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W3x3
@@ -389,16 +470,41 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, fra
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QASYMM8_SIGNED
-const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) });
-const auto output_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0) });
-const auto input_signed_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10) });
-const auto output_signed_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 10) });
+const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo",
+{
+ QuantizationInfo(1.f / 255.f, 10)
+});
+const auto output_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo",
+{
+ QuantizationInfo(3.f / 255.f, 0)
+});
+const auto input_signed_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo",
+{
+ QuantizationInfo(1.f / 255.f, -10)
+});
+const auto output_signed_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo",
+{
+ QuantizationInfo(3.f / 255.f, 10)
+});
TEST_SUITE(QSYMM8_PER_CHANNEL)
@@ -412,7 +518,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<ui
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
@@ -423,7 +529,7 @@ FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W4x4
@@ -437,7 +543,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<ui
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
@@ -448,7 +554,7 @@ FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W3x3
@@ -462,7 +568,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<ui
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
@@ -473,10 +579,35 @@ FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture
framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
}
TEST_SUITE_END() // W1x1
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture5x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1,
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ data_layouts_dataset),
+ input_qinfo_per_channel_dataset),
+ output_qinfo_per_channel_dataset),
+ add_bias_dataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture5x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_signed_qinfo_per_channel_dataset),
+ output_signed_qinfo_per_channel_dataset),
+ add_bias_dataset),
+ framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant);
+}
+TEST_SUITE_END() // W5x1
+
TEST_SUITE_END() // QSYMM8_PER_CHANNEL
TEST_SUITE_END() // Quantized
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index 5649e5a556..4972708144 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,14 +56,12 @@ const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::ma
const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertLayerBF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::BFLOAT16), framework::dataset::make("DataType", DataType::F32));
const auto DepthConvertLayerF16toU8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8));
const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
const auto DepthConvertLayerF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
const auto DepthConvertLayerF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
const auto DepthConvertLayerF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertLayerF32toBF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::BFLOAT16));
const auto DepthConvertLayerS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
const auto DepthConvertLayerS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8));
@@ -127,8 +125,6 @@ using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor
template <typename T>
using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>;
template <typename T>
-using NEDepthConvertLayerToBF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, bfloat16>;
-template <typename T>
using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, half>;
template <typename T>
using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>;
@@ -342,28 +338,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, frame
}
TEST_SUITE_END() // S16_to_S32
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
-TEST_SUITE(BFLOAT16_to_F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<bfloat16>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerBF16toF32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerZeroShiftDataset))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // BFLOAT16_to_F32
-
-TEST_SUITE(F32_to_BFLOAT16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToBF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toBF16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerZeroShiftDataset))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // F32_to_BFLOAT16
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
-
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16_to_QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 7260eec42d..e9609b7b72 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,32 +42,70 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
using namespace arm_compute::misc::shape_calculator;
namespace
{
-constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8_SIGNED */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
constexpr float tolerance_num = 0.05f; /**< Tolerance number */
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 8 });
-const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 32 });
+const auto depth_multipliers = make("DepthMultiplier", { 1, 2, 8 });
+const auto large_depth_multipliers = make("DepthMultiplier", { 5, 32 });
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+// Activation Functions
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
});
-const auto input_qinfo_dataset = framework::dataset::make("InputQInfo",
+const auto ActivationFunctionsDatasetNightly = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+#ifdef __aarch64__
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU),
+#endif // __aarch64__
+});
+
+const auto ActivationFunctionsQuantizedSmallDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+});
+
+const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+});
+
+// This is only used when there is fused activation
+const auto input_qinfo_dataset = make("InputQInfo",
{
QuantizationInfo(0.3f, 10),
QuantizationInfo(2.2f, 10),
});
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
} // namespace
TEST_SUITE(NEON)
@@ -76,7 +114,7 @@ TEST_SUITE(DepthwiseConvolutionLayer)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights
TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32), // Mismatching input feature maps
TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Unsupported weights dimensions
TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching depth multiplier
@@ -88,7 +126,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // dilation < 1
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+ make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(5U, 5U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
@@ -100,7 +138,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
@@ -112,7 +150,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
@@ -124,7 +162,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
})),
- framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -136,7 +174,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
})),
- framework::dataset::make("DepthMultiplier", { 1,
+ make("DepthMultiplier", { 1,
1,
1,
3,
@@ -148,7 +186,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
1,
1,
})),
- framework::dataset::make("Dilation", { Size2D(1U, 1U),
+ make("Dilation", { Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
@@ -160,7 +198,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
Size2D(0U, 1U),
Size2D(1U, 1U),
})),
- framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+ make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected)
{
bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false),
@@ -169,7 +207,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip
}
DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
@@ -178,7 +216,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Patch size bigger than input width
TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Dilation < 1
}),
- framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
@@ -187,7 +225,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
})),
- framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(2U), 1, DataType::F32),
TensorInfo(TensorShape(4U), 1, DataType::F32),
@@ -196,7 +234,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
TensorInfo(TensorShape(16U), 1, DataType::F32),
TensorInfo(TensorShape(16U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
@@ -205,7 +243,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
})),
- framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
@@ -214,7 +252,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
PadStrideInfo(1, 1, 0, 0),
PadStrideInfo(1, 1, 0, 0),
})),
- framework::dataset::make("DepthMultiplier", { 1,
+ make("DepthMultiplier", { 1,
1,
3,
1,
@@ -223,7 +261,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
2,
2,
})),
- framework::dataset::make("Dilation", { Size2D(1U, 1U),
+ make("Dilation", { Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
Size2D(1U, 1U),
@@ -232,7 +270,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip
Size2D(25U, 1U),
Size2D(0U, 1U),
})),
- framework::dataset::make("Expected", { false, false, false, false, false, false, false, false})),
+ make("Expected", { false, false, false, false, false, false, false, false})),
input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected)
{
bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, ActivationLayerInfo(), dilation));
@@ -244,43 +282,58 @@ template <typename T>
using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
template <typename T>
using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>;
TEST_SUITE(Float)
TEST_SUITE(F32)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 5 }),
+ make("DataType", DataType::F32),
+ make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
+ ActivationFunctionsDatasetNightly))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- framework::dataset::make("DepthMultiplier", { 2 })),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ make("DepthMultiplier", { 2 })),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
@@ -288,10 +341,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>,
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
@@ -301,9 +354,9 @@ TEST_SUITE_END() // Generic
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
@@ -311,10 +364,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>,
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
@@ -322,9 +375,9 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
@@ -332,10 +385,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>,
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
@@ -346,40 +399,70 @@ TEST_SUITE_END() // W3x3
TEST_SUITE(Optimized)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1)),
+ make("DataType",
+ DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ make("DepthMultiplier", 1)),
+ make("DataType", DataType::F32)),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("ActivationInfo", ActivationLayerInfo())))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ make("DepthMultiplier", 1)),
+ make("DataType",
+ DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1)),
+ make("DataType",
+ DataType::F32)),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
@@ -388,22 +471,37 @@ TEST_SUITE_END() // F32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 5 }),
+ make("DataType", DataType::F16),
+ make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
+ ActivationFunctionsDatasetNightly))
+{
+ validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
}
@@ -412,9 +510,8 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataType", DataType::F16)),
+ make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -422,10 +519,9 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
large_depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataType", DataType::F16)),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
}
@@ -437,9 +533,9 @@ using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFi
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
@@ -447,10 +543,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
@@ -460,9 +556,9 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
@@ -470,10 +566,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
large_depth_multipliers),
- framework::dataset::make("DataType",
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
@@ -484,31 +580,31 @@ TEST_SUITE_END() // W3x3
TEST_SUITE(Optimized)
FIXTURE_DATA_TEST_CASE_NEW(RunSmallW3x3, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
FIXTURE_DATA_TEST_CASE_NEW(RunSmallW5x5, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLargeW3x3, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
+ make("DepthMultiplier", 1)),
+ make("DataType",
DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
@@ -526,49 +622,88 @@ using NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture = Depthwise
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 5 }),
+ make("DataType", DataType::QASYMM8),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }),
+ make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
+ ActivationFunctionsQuantizedDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, NEDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- framework::dataset::make("DepthMultiplier", { 2 })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ make("DepthMultiplier", { 2 }),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
@@ -576,47 +711,66 @@ TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE(Dilation)
-
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
@@ -625,48 +779,68 @@ TEST_SUITE_END() // W3x3
TEST_SUITE(Optimized)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NHWC }),
+ NoActivation))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
@@ -674,143 +848,242 @@ TEST_SUITE_END() // Optimized
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 5 }),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }),
+ make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
+ ActivationFunctionsQuantizedDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- large_depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // W3x3
TEST_SUITE(Optimized)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ input_qinfo_dataset,
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ ActivationFunctionsQuantizedSmallDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8_SIGNED)),
- input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- ActivationFunctionsDataset))
+ combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ make("DepthMultiplier", 1),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationInfo,
+ IgnoredQuantizationInfo,
+ make("DataLayout", { DataLayout::NCHW }),
+ NoActivation))
{
- validate(Accessor(_target), _reference, tolerance_qasymm8);
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // Optimized
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM8_PER_CHANNEL)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY,
+ combine(
+ make("In", TensorShape(33U, 27U, 11U, 3U)),
+ make("Weights", Size2D(3U, 4U)),
+ make("Info", PadStrideInfo(1, 2, 0, 1)),
+ make("Dilation", Size2D(2U, 2U)),
+ make("DepthMultiplier", { 5 }),
+ make("InputDataType", DataType::QASYMM8),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+ make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+ make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }),
+ make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
+ ActivationFunctionsQuantizedDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
TEST_SUITE(Generic)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("InputDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("InputDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -820,11 +1093,11 @@ TEST_SUITE(Dilation)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("InputDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("InputDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -832,12 +1105,12 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetr
FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
depth_multipliers),
- framework::dataset::make("InputDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("InputDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
@@ -847,25 +1120,25 @@ TEST_SUITE_END() // Generic
TEST_SUITE(Optimized)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("InputDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("DepthMultiplier", 1)),
+ make("InputDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("InputDataType", DataType::QASYMM8)),
- framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+ make("DepthMultiplier", 1)),
+ make("InputDataType", DataType::QASYMM8)),
+ make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
input_qinfo_dataset),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
- framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- ActivationFunctionsDataset))
+ make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+ make("DataLayout", { DataLayout::NHWC })),
+ make("ActivationInfo", { ActivationLayerInfo() })))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
index ddf3faacb6..221fc5d249 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/framework/Macros.h"
@@ -134,6 +135,44 @@ TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(dst.info()->padding().empty(), framework::LogLevel::ERRORS);
}
+TEST_SUITE(KERNEL_SELECTION)
+DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,
+ combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::QASYMM8_SIGNED,
+ DataType::QASYMM8,
+ DataType::QSYMM8_PER_CHANNEL
+ })),
+ framework::dataset::make("DataType_per_channel", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ })),
+ cpu_ext, data_type, data_type_per_channel)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuDepthwiseConv2dNativeKernel::get_implementation(
+ DepthwiseConv2dNativeDataTypeISASelectorData{ data_type, data_type_per_channel,cpu_isa },
+ cpu::KernelSelectionType::Preferred );
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string per_channel_str = "_";
+ if (data_type == DataType::QSYMM8_PER_CHANNEL)
+ {
+ per_channel_str = "_" + cpu_impl_dt(data_type_per_channel) + "_" ;
+ }
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + per_channel_str + "deptwiseconv2dnative";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // KERNEL_SELECTION
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::ALL,
diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp
index 2f0fce2ce0..fbfe8b8a7a 100644
--- a/tests/validation/NEON/DilatedConvolutionLayer.cpp
+++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/cpu/operators/CpuConv2d.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/DilatedConvolutionLayerDataset.h"
@@ -49,7 +50,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.3f);
const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
constexpr float tolerance_num_f16 = 0.07f; /**< Tolerance number for FP16 */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -96,7 +97,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
input_info, weights_info, output_info, conv_info, dilation, expected)
{
- ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false),
+ ConvolutionMethod is_valid = cpu::CpuConv2d::get_convolution_method(&input_info.clone()->set_is_resizable(false),
&weights_info.clone()->set_is_resizable(false),
&output_info.clone()->set_is_resizable(false),
conv_info, WeightsInfo(), dilation);
@@ -161,13 +162,18 @@ template <typename T>
using NEGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
TEST_SUITE(Quantized)
+/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored
+/// This is because instead of using the same quantization information for all the tensors, the fixture generates
+/// separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, we can remove the explicit
+/// quantization info.
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo())))
{
// Validate output
@@ -178,7 +184,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerQuantizedFixture<u
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataLayout", { DataLayout::NCHW })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo())))
{
// Validate output
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 368aef216a..0779c9d388 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,9 +23,12 @@
*/
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuDirectConv2dKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -70,8 +73,8 @@ const auto data_pad_f16 = concat(combine(framework::dataset::make("PadX", { 0, 1
framework::dataset::make("KernelSize", 1))));
const auto data_f32 = combine(datasets::SmallDirectConvolutionShapes(),
- combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
- combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
+ combine(framework::dataset::make("StrideX", { 1, 2, 3, 4 }),
+ combine(framework::dataset::make("StrideY", { 1, 2, 3, 4 }),
data_pad_f32)));
const auto data_f16 = combine(datasets::SmallDirectConvolutionShapes(),
@@ -87,17 +90,25 @@ const auto data_prec = combine(datasets::SmallDirectConvolutionShapes(),
framework::dataset::make("KernelSize", 3))))));
const auto data9x9 = combine(datasets::SmallDirectConvolutionShapes(),
- combine(framework::dataset::make("StrideX", { 1 }),
- combine(framework::dataset::make("StrideY", { 1 }),
+ combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
+ combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
combine(framework::dataset::make("PadX", { 0, 2 }),
combine(framework::dataset::make("PadY", { 0, 3 }),
framework::dataset::make("KernelSize", 9))))));
-const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4 }));
-const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4 }));
+const auto data8x8 = combine(datasets::SmallDirectConvolutionShapes(),
+ combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
+ combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
+ combine(framework::dataset::make("PadX", { 0 }),
+ combine(framework::dataset::make("PadY", { 0 }),
+ framework::dataset::make("KernelSize", 8))))));
+
+const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4, 5 }));
+const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4, 5 }));
const auto data_precommit = combine(data_prec, framework::dataset::make("NumKernels", { 1 }));
const auto data_precommit9x9 = combine(data9x9, framework::dataset::make("NumKernels", { 4 }));
+const auto data_precommit8x8 = combine(data8x8, framework::dataset::make("NumKernels", { 4 }));
/* The following tests is from real use-case that made DirectConvolution
* overflows in terms of its tensor indexing. This test case is using
@@ -172,17 +183,52 @@ TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
validate(Accessor(dst), ref_dst);
}
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+ concat(combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW }))),
+ cpu_ext, data_type, data_layout)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuDirectConv2dKernel::get_implementation(DataTypeDataLayoutISASelectorData{ data_type, data_layout, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string data_layout_str;
+ if(data_layout == DataLayout::NCHW)
+ {
+ data_layout_str = "nchw";
+ }
+ else
+ {
+ data_layout_str = "nhwc";
+ }
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_" + data_layout_str + "_directconv2d";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported non-rectangular weights dimensions
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported stride
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
}),
framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
@@ -319,13 +365,23 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<float>, framewo
validate(Accessor(_target), _reference, tolerance_fp32);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit,
- framework::dataset::make("DataType", DataType::F32)),
- ActivationFunctionsDataset),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32);
}
+
+FIXTURE_DATA_TEST_CASE(RunSmall8x8, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit8x8, framework::dataset::make("DataType",
+ DataType::F32)),
+ ActivationFunctionsDataset),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+
FIXTURE_DATA_TEST_CASE(RunSmall9x9, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit9x9, framework::dataset::make("DataType",
DataType::F32)),
ActivationFunctionsDataset),
diff --git a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
index ccde670034..0667ac73f9 100644
--- a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
+++ b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
@@ -53,6 +60,9 @@ TEST_SUITE(AbsLayer)
template <typename T>
using NEAbsLayerFixture = AbsValidationFixture<Tensor, Accessor, NEAbsLayer, T>;
+template <typename T>
+using NEAbsLayerQuantizedFixture = AbsQuantizedValidationFixture<Tensor, Accessor, NEAbsLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -107,6 +117,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsLayerFixture<int32_t>, framework::DatasetM
TEST_SUITE_END() // S32
TEST_SUITE_END() // Integer
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // AbsLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp
index 8abccb2ed6..95db4ad5fd 100644
--- a/tests/validation/NEON/ElementwiseDivision.cpp
+++ b/tests/validation/NEON/ElementwiseDivision.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,7 @@ namespace validation
namespace
{
RelativeTolerance<float> tolerance_fp32(0.000001f);
-AbsoluteTolerance<int> tolerance_zero_s32(1); // Tolerance for S32 division
+AbsoluteTolerance<int> tolerance_zero_s32(0); // Tolerance for S32 division
/** Input data sets **/
const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
@@ -56,6 +56,8 @@ const auto ElementwiseDivisionFP16Dataset = combine(combine(framewo
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto ElementwiseDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -105,14 +107,16 @@ using CpuElementwiseDivisionBroadcastDynamicShapeFixture = ArithmeticDivisionBro
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CpuElementwiseDivisionDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuElementwiseDivisionDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CpuElementwiseDivisionBroadcastDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(),
- ElementwiseDivisionFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CpuElementwiseDivisionBroadcastDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseDivisionFP32Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -124,7 +128,8 @@ TEST_SUITE_END() // DynamicShape
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
@@ -133,7 +138,8 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -142,8 +148,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework:
template <typename T>
using NEElementwiseDivisionBroadcastFixture = ArithmeticDivisionBroadcastValidationFixture<Tensor, Accessor, NEElementwiseDivision, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(),
- ElementwiseDivisionFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseDivisionFP32Dataset),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwiseDivisionFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -153,7 +167,8 @@ TEST_SUITE_END() // Float
TEST_SUITE(Integer)
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<int32_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_zero_s32);
@@ -162,7 +177,7 @@ TEST_SUITE_END() // S32
TEST_SUITE_END() // Integer
TEST_SUITE_END() // ElementwiseDivision
-TEST_SUITE_END() // Neon
+TEST_SUITE_END() // NEON
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp
index f9e5f39989..31cd78626f 100644
--- a/tests/validation/NEON/ElementwiseExpLayer.cpp
+++ b/tests/validation/NEON/ElementwiseExpLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
+
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(ExpLayer)
@@ -53,6 +62,9 @@ TEST_SUITE(ExpLayer)
template <typename T>
using NEExpLayerFixture = ExpValidationFixture<Tensor, Accessor, NEExpLayer, T>;
+template <typename T>
+using NEExpLayerQuantizedFixture = ExpQuantizedValidationFixture<Tensor, Accessor, NEExpLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -82,6 +94,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture<float>, framework::DatasetMod
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.01, 0) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.003, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.02, -1) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.002, -2) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // ExpLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseKernelSelection.cpp b/tests/validation/NEON/ElementwiseKernelSelection.cpp
new file mode 100644
index 0000000000..7990a51936
--- /dev/null
+++ b/tests/validation/NEON/ElementwiseKernelSelection.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuElementwiseKernel.h"
+#include "src/cpu/kernels/CpuElementwiseUnaryKernel.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(KernelSelection)
+
+DATA_TEST_CASE(KernelSelection_elementwise_unary, framework::DatasetMode::ALL, concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_elementwise_unary";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
+DATA_TEST_CASE(KernelSelection_elementwise_arithmetic, framework::DatasetMode::ALL, concat(concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32,
+ DataType::S16,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32,
+ DataType::S16
+ }))),
+ combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuArithmeticKernel::get_implementation(
+ ElementwiseDataTypeISASelectorData{ data_type, cpu_isa, static_cast<int>(ArithmeticOperation::ADD) },
+ cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_arithmetic";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
+DATA_TEST_CASE(KernelSelection_elementwise_comparison, framework::DatasetMode::ALL, concat(concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32,
+ DataType::S16,
+ DataType::U8,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::S32,
+ DataType::S16,
+ DataType::U8
+ }))),
+ combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuComparisonKernel::get_implementation(
+ ElementwiseDataTypeISASelectorData{ data_type, cpu_isa, static_cast<int>(ComparisonOperation::Equal) },
+ cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_comparison";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/ElementwiseLog.cpp b/tests/validation/NEON/ElementwiseLog.cpp
index 3aa7fb3665..1175903dac 100644
--- a/tests/validation/NEON/ElementwiseLog.cpp
+++ b/tests/validation/NEON/ElementwiseLog.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
+
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(LogLayer)
@@ -53,6 +62,9 @@ TEST_SUITE(LogLayer)
template <typename T>
using NELogLayerFixture = LogValidationFixture<Tensor, Accessor, NELogLayer, T>;
+template <typename T>
+using NELogLayerQuantizedFixture = LogQuantizedValidationFixture<Tensor, Accessor, NELogLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -88,6 +100,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NELogLayerFixture<float>, framework::DatasetMod
}
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(10.5, 0), QuantizationInfo(0.5, -10) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(5, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.75, -128) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(12.5, -2) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // LogLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseMax.cpp b/tests/validation/NEON/ElementwiseMax.cpp
index 4bc263184e..61421ab3e5 100644
--- a/tests/validation/NEON/ElementwiseMax.cpp
+++ b/tests/validation/NEON/ElementwiseMax.cpp
@@ -62,6 +62,8 @@ const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make(
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto ElementwiseMaxFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -111,7 +113,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
// *INDENT-ON*
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMaxS32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMaxS32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -119,7 +122,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::Da
TEST_SUITE_END() // S32
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -131,11 +135,12 @@ using NEElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixtur
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQASYMM8Dataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -144,11 +149,13 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, fram
template <typename T>
using NEElementwiseMaxQuantizedBroadcastFixture = ElementwiseMaxQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMax, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
- ElementwiseMaxQASYMM8Dataset),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseMaxQASYMM8Dataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -156,16 +163,26 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixt
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQASYMM8SignedDataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 0) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })))
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ ElementwiseMaxQASYMM8SignedDataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
-
TEST_SUITE_END()
TEST_SUITE_END()
@@ -173,7 +190,8 @@ TEST_SUITE_END()
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -182,7 +200,8 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -190,8 +209,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::Data
template <typename T>
using NEElementwiseMaxBroadcastFixture = ElementwiseMaxBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMax, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(),
- ElementwiseMaxFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseMaxFP32Dataset),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwiseMaxFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/ElementwiseMin.cpp b/tests/validation/NEON/ElementwiseMin.cpp
index 3836b90308..a134eb354d 100644
--- a/tests/validation/NEON/ElementwiseMin.cpp
+++ b/tests/validation/NEON/ElementwiseMin.cpp
@@ -62,6 +62,8 @@ const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make(
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto ElementwiseMinFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -110,7 +112,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
// *INDENT-ON*
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMinS32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMinS32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -118,7 +121,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::Da
TEST_SUITE_END() // S32
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinS16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -133,23 +137,34 @@ TEST_SUITE(QASYMM8)
template <typename T>
using NEElementwiseMinQuantizedBroadcastFixture = ElementwiseMinQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMin, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
- ElementwiseMinQASYMM8Dataset),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseMinQASYMM8Dataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwiseMinQASYMM8Dataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })),
+ InPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMinQASYMM8Dataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
- )
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -157,11 +172,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, fram
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseMaxQASYMM8SignedDataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 0) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })))
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
@@ -174,7 +190,8 @@ TEST_SUITE_END()
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -183,7 +200,8 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -192,8 +210,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::Data
template <typename T>
using NEElementwiseMinBroadcastFixture = ElementwiseMinBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMin, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(),
- ElementwiseMinFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseMinFP32Dataset),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwiseMinFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp
index 0b63588d8a..5b8ae8fc64 100644
--- a/tests/validation/NEON/ElementwiseNegation.cpp
+++ b/tests/validation/NEON/ElementwiseNegation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(NegLayer)
@@ -53,6 +60,9 @@ TEST_SUITE(NegLayer)
template <typename T>
using NENegLayerFixture = NegValidationInPlaceFixture<Tensor, Accessor, NENegLayer, T>;
+template <typename T>
+using NENegLayerQuantizedFixture = NegQuantizedValidationFixture<Tensor, Accessor, NENegLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -113,6 +123,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<int32_t>, framework::DatasetM
TEST_SUITE_END() // S32
TEST_SUITE_END() // Integer
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // NegLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwisePower.cpp b/tests/validation/NEON/ElementwisePower.cpp
index 4305387c5f..9ac9eec280 100644
--- a/tests/validation/NEON/ElementwisePower.cpp
+++ b/tests/validation/NEON/ElementwisePower.cpp
@@ -51,6 +51,8 @@ const auto ElementwisePowerFP16Dataset = combine(combine(framework:
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto ElementwisePowerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -91,7 +93,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
@@ -101,13 +104,15 @@ TEST_SUITE_END() // F16
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), ElementwisePowerFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ElementwisePowerFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -116,15 +121,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::Da
template <typename T>
using NEElementwisePowerBroadcastFixture = ElementwisePowerBroadcastValidationFixture<Tensor, Accessor, NEElementwisePower, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(),
- ElementwisePowerFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwisePowerFP32Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
}
-
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapesBroadcast(),
- ElementwisePowerFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwisePowerFP32Dataset),
+ InPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+ ElementwisePowerFP32Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp
index d2f0b456a0..620618cb0b 100644
--- a/tests/validation/NEON/ElementwiseRound.cpp
+++ b/tests/validation/NEON/ElementwiseRound.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,12 +40,20 @@ namespace test
{
namespace validation
{
+namespace
+{
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+} // namespace
TEST_SUITE(NEON)
TEST_SUITE(RoundLayer)
template <typename T>
using NERoundLayerFixture = RoundValidationFixture<Tensor, Accessor, NERoundLayer, T>;
+template <typename T>
+using NERoundLayerQuantizedFixture = RoundQuantizedValidationFixture<Tensor, Accessor, NERoundLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -81,6 +89,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture<float>, framework::DatasetM
}
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // RoundLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
index 2d52183b15..80788c893f 100644
--- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
+++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(RsqrtLayer)
@@ -72,6 +79,9 @@ TEST_SUITE_END() // DynamicShape
template <typename T>
using NERsqrtLayerFixture = RsqrtValidationFixture<Tensor, Accessor, NERsqrtLayer, T>;
+template <typename T>
+using NERsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<Tensor, Accessor, NERsqrtLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -102,6 +112,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture<float>, framework::DatasetM
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(20, 0) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(25, -128) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // RsqrtLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseSin.cpp b/tests/validation/NEON/ElementwiseSin.cpp
index 06775c0690..9c2d7ae268 100644
--- a/tests/validation/NEON/ElementwiseSin.cpp
+++ b/tests/validation/NEON/ElementwiseSin.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,8 @@ AbsoluteTolerance<float> tolerance_fp32(0.00001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
AbsoluteTolerance<float> tolerance_fp16(0.0005f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(SinLayer)
@@ -53,6 +55,9 @@ TEST_SUITE(SinLayer)
template <typename T>
using NESinLayerFixture = SinValidationFixture<Tensor, Accessor, NESinLayer, T>;
+template <typename T>
+using NESinLayerQuantizedFixture = SinQuantizedValidationFixture<Tensor, Accessor, NESinLayer, T>;
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
@@ -89,6 +94,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESinLayerFixture<float>, framework::DatasetMod
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(200, 10) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("InputQInfo", { QuantizationInfo(0.07, 6) })),
+ framework::dataset::make("OutputQInfo", { QuantizationInfo(123, -7) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // Quantized
TEST_SUITE_END() // SinLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseSquareDiff.cpp b/tests/validation/NEON/ElementwiseSquareDiff.cpp
index 069cbbd7fa..9a86b541de 100644
--- a/tests/validation/NEON/ElementwiseSquareDiff.cpp
+++ b/tests/validation/NEON/ElementwiseSquareDiff.cpp
@@ -68,6 +68,8 @@ const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::datase
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto ElementwiseSquaredDiffFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
} // namespace
TEST_SUITE(NEON)
@@ -109,7 +111,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
// *INDENT-ON*
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -117,7 +120,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, frame
TEST_SUITE_END() // S32
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -129,13 +133,12 @@ using NEElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidatio
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseSquaredDiffQASYMM8Dataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
- )
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
@@ -144,11 +147,23 @@ template <typename T>
using NEElementwiseSquaredDiffQuantizedBroadcastFixture = ElementwiseSquaredDiffQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseSquaredDiff, T>;
FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
- ElementwiseSquaredDiffQASYMM8Dataset),
+ combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseSquaredDiffQASYMM8Dataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseSquaredDiffQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ ElementwiseSquaredDiffQASYMM8Dataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -156,11 +171,12 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffQuantizedBroad
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
ElementwiseSquaredDiffQASYMM8SignedDataset),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 5) })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(.5f, 5) })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(.2f, 5) })))
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(.2f, 5) })),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -171,7 +187,8 @@ TEST_SUITE_END()
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
@@ -180,7 +197,8 @@ TEST_SUITE_END() // F16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -188,15 +206,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framewo
template <typename T>
using NEElementwiseSquaredDiffBroadcastFixture = ElementwiseSquaredDiffBroadcastValidationFixture<Tensor, Accessor, NEElementwiseSquaredDiff, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapesBroadcast(),
- ElementwiseSquaredDiffFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
+ ElementwiseSquaredDiffFP32Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapesBroadcast(),
- ElementwiseSquaredDiffFP32Dataset))
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+ ElementwiseSquaredDiffFP32Dataset),
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/FillBorder.cpp b/tests/validation/NEON/FillBorder.cpp
index 343ad831e4..928990b2b4 100644
--- a/tests/validation/NEON/FillBorder.cpp
+++ b/tests/validation/NEON/FillBorder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,10 +60,10 @@ DATA_TEST_CASE(FillBorder, framework::DatasetMode::ALL, combine(combine(combine(
{
BorderSize border_size{ static_cast<unsigned int>(size) };
- std::mt19937 generator(library->seed());
- std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
- const uint8_t border_value = distribution_u8(generator);
- const uint8_t tensor_value = distribution_u8(generator);
+ std::mt19937 generator(library->seed());
+ std::uniform_int_distribution<uint32_t> distribution_u8(0, 255);
+ const uint8_t border_value = distribution_u8(generator);
+ const uint8_t tensor_value = distribution_u8(generator);
// Create tensors
Tensor src = create_tensor<Tensor>(shape, data_type);
@@ -77,7 +77,7 @@ DATA_TEST_CASE(FillBorder, framework::DatasetMode::ALL, combine(combine(combine(
validate(src.info()->padding(), padding);
// Fill tensor with constant value
- std::uniform_int_distribution<uint8_t> distribution{ tensor_value, tensor_value };
+ std::uniform_int_distribution<uint32_t> distribution{ tensor_value, tensor_value };
library->fill(Accessor(src), distribution, 0);
// Create and configure kernel
diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp
index 419ce56e44..3cd1033ef9 100644
--- a/tests/validation/NEON/Floor.cpp
+++ b/tests/validation/NEON/Floor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,9 +22,12 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEFloor.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuFloorKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -62,6 +65,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
const Status status = NEFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false));
ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
}
+
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ })),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuFloorKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_floor";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index 4bb48bf42c..ee7e56227d 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,8 @@
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "src/cpu/operators/CpuFullyConnected.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/FullyConnectedLayerDataset.h"
@@ -40,6 +42,7 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
/** Tolerance for float operations */
@@ -56,7 +59,7 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
DataType::F16,
@@ -64,18 +67,25 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
DataType::F32,
});
-const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto FullyConnectedParameters = combine(make("TransposeWeights", { false, true }), make("ReshapeWeights", { false, true }));
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto QuantizationData = make("QuantizationInfo",
{
QuantizationInfo(1.f / 256.f, 10),
QuantizationInfo(1.1f, 10),
});
-const auto EmptyActivationFunctionDataset = framework::dataset::make("ActivationInfo",
+
+const auto IgnoredQuantizationData = make("IgnoredQuantizationInfo",
+{
+ QuantizationInfo(),
+});
+
+const auto NoActivationFunctionDataset = make("ActivationInfo",
{
ActivationLayerInfo(),
});
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
@@ -83,7 +93,7 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
});
-const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo",
{
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
@@ -94,40 +104,183 @@ const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("Activ
TEST_SUITE(NEON)
TEST_SUITE(FullyConnectedLayer)
+/** Test case for memory injection in @ref cpu::CpuFullyConnected.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+ auto fc = std::make_unique<cpu::CpuFullyConnected>();
+ const auto src_info = TensorInfo(TensorShape(8U), 1, DataType::F32, DataLayout::NHWC);
+ const auto weight_info = TensorInfo(TensorShape(8U, 4U), 1, DataType::F32, DataLayout::NHWC);
+ const auto bias_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC);
+ auto dst_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC);
+ const auto fc_info = FullyConnectedLayerInfo{};
+ fc->configure(&src_info, &weight_info, &bias_info, &dst_info, fc_info);
+
+ // telhs are newly created every call of this lambda function
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(fc->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ // This operator is configured once and captured by this lambda.
+ fc->prepare(prep_pack);
+ fc->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Test case for memory injection in @ref NEFullyConnectedLayer.
+ *
+ * Make sure @ref NEFullyConnectedLayer still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+ auto fc = std::make_unique<NEFullyConnectedLayer>();
+ const auto src_info = TensorInfo(TensorShape(8U), 1, DataType::F32, DataLayout::NHWC);
+ const auto weight_info = TensorInfo(TensorShape(8U, 4U), 1, DataType::F32, DataLayout::NHWC);
+ const auto bias_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC);
+ auto dst_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC);
+ const auto fc_info = FullyConnectedLayerInfo{};
+ auto run_conv = [&]()
+ {
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ fc->configure(&src, &weight, &bias, &dst, fc_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+ library->fill_tensor_value(Accessor(src), 1.f);
+ library->fill_tensor_value(Accessor(weight), 2.f);
+ library->fill_tensor_value(Accessor(bias), 3.f);
+ fc->run();
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Unit test for @ref cpu::CpuFullyConnected with quantized multipler > 1
+ *
+ * Tests output correctness.
+ */
+TEST_CASE(Quant8_Signed_Mult_gt_1, framework::DatasetMode::ALL)
+{
+ auto fc = std::make_unique<cpu::CpuFullyConnected>();
+ const auto src_info = TensorInfo(TensorShape(1U, 3U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.5f, -1));
+ const auto weight_info = TensorInfo(TensorShape(1U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.5, -8));
+ const auto bias_info = TensorInfo(TensorShape(1U), 1, DataType::S32);
+ auto dst_info = TensorInfo(TensorShape(1U, 3U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.1f, 0));
+ const auto fc_info = FullyConnectedLayerInfo{};
+ fc->configure(&src_info, &weight_info, &bias_info, &dst_info, fc_info);
+
+ // telhs are newly created every call of this lambda function
+ auto src = create_tensor<Tensor>(src_info);
+ auto weight = create_tensor<Tensor>(weight_info);
+ auto bias = create_tensor<Tensor>(bias_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ src.allocator()->allocate();
+ weight.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias }, { TensorType::ACL_DST, &dst } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(fc->workspace(), mg, run_pack, prep_pack);
+
+ // Initialize input values
+ const std::vector<int8_t> src_values = { 3, 63, 31 };
+ const std::vector<int8_t> weight_values = { -4 };
+ const std::vector<int32_t> bias_values = { 16 };
+ const std::vector<int32_t> expected = { 80, 127, 127 };
+ library->fill_static_values(Accessor(src), src_values);
+ library->fill_static_values(Accessor(weight), weight_values);
+ library->fill_static_values(Accessor(bias), bias_values);
+
+ // Run FC layer
+ fc->prepare(prep_pack);
+ fc->run(run_pack);
+
+ auto dst_ptr = reinterpret_cast<int8_t *>(dst.buffer());
+ for(size_t i = 0; i < dst.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(dst_ptr[i] == expected[i], framework::LogLevel::ERRORS);
+ }
+}
+
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types
+ make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types
TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Invalid weights dimensions
TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Wrongly reshaped weights
TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
}),
- framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
+ make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
})),
- framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
+ make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
TensorInfo(TensorShape(192U), 1, DataType::F32),
TensorInfo(TensorShape(192U), 1, DataType::F32),
TensorInfo(TensorShape(271U), 1, DataType::F32),
TensorInfo(TensorShape(271U), 1, DataType::F32),
TensorInfo(TensorShape(192U), 1, DataType::F32),
})),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+ make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
})),
- framework::dataset::make("TransposeWeights",{ true, true, false, true, true, true })),
- framework::dataset::make("ReshapedWeights",{ false, false, false, false, false , false})),
- framework::dataset::make("Expected", { false, true, true, false, false, true })),
+ make("TransposeWeights",{ true, true, false, true, true, true })),
+ make("ReshapedWeights",{ false, false, false, false, false , false})),
+ make("Expected", { false, true, true, false, false, true })),
input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
{
// Create Fully Connected layer info
@@ -145,74 +298,89 @@ template <typename T>
using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
template <typename T>
using NEFullyConnectedLayerMixedDataLayoutFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
+template <typename T>
+using NEFullyConnectedLayerDynamicWeightsFixture = FullyConnectedWithDynamicWeightsFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
+template <typename T>
+using NEFullyConnectedLayerDynamicBiasFixture = FullyConnectedWithDynamicBiasFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::F16),
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
combine(datasets::FullyConnectedLayerWithActivationDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)),
+ FullyConnectedParameters,
+ make("DataType", DataType::F16),
ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::F16),
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::F16),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+ make("WeightsReshaped", { false, true })))
+{
+}
TEST_SUITE_END()
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters,
+ make("DataType", DataType::F32),
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::F32),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(
- combine(datasets::FullyConnectedLayerWithActivationDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
+FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::F32),
ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters,
+ make("DataType", DataType::F32),
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::F32),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+ make("WeightsReshaped", { false, true })))
+{
+}
TEST_SUITE_END()
TEST_SUITE_END()
@@ -223,87 +391,152 @@ using NEFullyConnectedLayerQuantizedMixedDataLayoutFixture = FullyConnectedLayer
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
- combine(datasets::SmallFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ QuantizationData,
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
-{
- // Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8);
-}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(datasets::FullyConnectedLayerWithActivationDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ QuantizationData,
ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeightsWithActivation, NEFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+ make("WeightsReshaped", { false })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicBiasWithActivation, NEFullyConnectedLayerDynamicBiasFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(
- combine(datasets::LargeFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData),
- EmptyActivationFunctionDataset))
+// Dynamic Quantization Tests here
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationData,
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-TEST_SUITE_END()
-TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
- combine(datasets::SmallFullyConnectedLayerDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- QuantizationData),
- EmptyActivationFunctionDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(
+ datasets::LargeFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ framework::dataset::make("DataType", DataType::QASYMM8),
+ QuantizationData,
+ NoActivationFunctionDataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(9U, 5U, 7U)),
- framework::dataset::make("Weights", TensorShape(315U, 271U))),
- framework::dataset::make("Biases", TensorShape(271U))),
- framework::dataset::make("Output", TensorShape(271U))),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- QuantizationData),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+FIXTURE_DATA_TEST_CASE(RunDynamicBias, NEFullyConnectedLayerDynamicBiasFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8),
+ NoActivationFunctionDataset))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8),
+ IgnoredQuantizationData,
+ NoActivationFunctionDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8),
+ NoActivationFunctionDataset,
+ make("WeightsReshaped", { false })))
+{
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ QuantizationData,
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
combine(datasets::FullyConnectedLayerWithActivationDataset(),
- FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- QuantizationData),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ QuantizationData,
ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeightsWithActivation, NEFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+ make("WeightsReshaped", { false })))
+{
+}
+
+// Dynamic Quantization tests
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(
+ datasets::SmallFullyConnectedLayerDataset(),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ IgnoredQuantizationData,
+ NoActivationFunctionDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ make("Input", TensorShape(9U, 5U, 7U)),
+ make("Weights", TensorShape(315U, 271U)),
+ make("Biases", TensorShape(271U)),
+ make("Output", TensorShape(271U)),
+ FullyConnectedParameters,
+ make("DataType", DataType::QASYMM8_SIGNED),
+ QuantizationData,
+ NoActivationFunctionDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ NoActivationFunctionDataset,
+ make("WeightsReshaped", { false })))
+{
+}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // FullyConnectedLayer
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 500c6029d5..5f6a402204 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,12 +22,15 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
-#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "src/cpu/kernels/CpuGemmInterleave4x4Kernel.h"
+#include "src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h"
+#include "src/cpu/kernels/CpuGemmTranspose1xWKernel.h"
+#include "src/cpu/operators/CpuGemm.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -48,6 +51,8 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
+
namespace
{
constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
@@ -57,7 +62,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute
constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
DataType::F16,
@@ -65,62 +70,210 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
DataType::F32,
});
-const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
-const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
+const auto data_interleave = make("M", 8, 12) * make("N", 8, 12);
+const auto data_transpose = make("M", 8, 14) * make("N", 7, 14);
/** Zero padding test */
template <typename FunctionType>
bool validate_zero_padding(unsigned int dim0_value, unsigned int dim1_value)
{
const TensorShape in_shape(dim0_value, dim1_value);
+ TensorInfo in(in_shape, 1, DataType::U32);
+ TensorInfo dst;
- // Create tensors
- Tensor in = create_tensor<Tensor>(in_shape, DataType::U32);
- Tensor dst;
-
- ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(in.is_resizable(), framework::LogLevel::ERRORS);
// Validate zero-padding
FunctionType func;
func.configure(&in, &dst);
- return in.info()->padding().empty();
+ return in.padding().empty();
}
/* Zero padding test for GEMM kernels */
bool validate_gemm_zero_padding(const TensorShape shape0, const TensorShape shape1)
{
// Create tensors
- Tensor in0 = create_tensor<Tensor>(shape0, DataType::F32);
- Tensor in1 = create_tensor<Tensor>(shape1, DataType::F32);
- Tensor dst;
+ TensorInfo in0(shape0, 1, DataType::F32);
+ TensorInfo in1(shape1, 1, DataType::F32);
+ TensorInfo dst;
// Validate zero-padding
- NEGEMMMatrixMultiplyKernel gemm;
+ cpu::kernels::CpuGemmMatrixMultiplyKernel gemm;
gemm.configure(&in0, &in1, &dst, 1.0, false);
- return in0.info()->padding().empty() && in1.info()->padding().empty() && dst.info()->padding().empty();
+ return in0.padding().empty() && in1.padding().empty() && dst.padding().empty();
}
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(GEMM)
+/** Test case for memory injection in @ref cpu::CpuGemm.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+ auto gemm = std::make_unique<cpu::CpuGemm>();
+ const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+ const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto gemm_info = GEMMInfo{};
+ gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
+
+ // telhs are newly created every call of this lambda function
+ auto lhs = create_tensor<Tensor>(lhs_info);
+ auto rhs = create_tensor<Tensor>(rhs_info);
+ auto c = create_tensor<Tensor>(c_info);
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ c.allocator()->allocate();
+
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+ ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(lhs), 1.f);
+ library->fill_tensor_value(Accessor(rhs), 2.f);
+ library->fill_tensor_value(Accessor(c), 3.f);
+ // This operator is configured once and captured by this lambda.
+ gemm->prepare(prep_pack);
+ gemm->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+/** Test case for memory injection in @ref NEGEMM.
+ *
+ * Make sure @ref NEGEMM still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+ auto gemm = std::make_unique<NEGEMM>();
+ const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+ const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+ const auto gemm_info = GEMMInfo{};
+ auto run_conv = [&]()
+ {
+ auto lhs = create_tensor<Tensor>(lhs_info);
+ auto rhs = create_tensor<Tensor>(rhs_info);
+ auto c = create_tensor<Tensor>(c_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ gemm->configure(&lhs, &rhs, &c, &dst, 1.f, 1.f, gemm_info);
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ c.allocator()->allocate();
+ dst.allocator()->allocate();
+ library->fill_tensor_value(Accessor(lhs), 1.f);
+ library->fill_tensor_value(Accessor(rhs), 2.f);
+ library->fill_tensor_value(Accessor(c), 3.f);
+ gemm->run();
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
+ }),
+ make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
+ TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
+ })),
+ make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
+ TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
+ })),
+ make("Expected", { false, true })),
+ lhs_info, rhs_info, output_info, expected)
+{
+ constexpr float alpha = 1.0;
+ constexpr float beta = 0.0;
+ const auto gemm_info = GEMMInfo();
+ bool is_valid = bool(NEGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE(KERNEL_SELECTION)
+DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,
+ combine(make("CpuExt", std::string("NEON")),
+ make("DataType", { DataType::F32,
+ DataType::F16
+ })),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl_mul = CpuGemmMatrixMultiplyKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_mul);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_mul";
+ std::string actual = selected_impl_mul->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+
+ const auto *selected_impl_add = CpuGemmMatrixAdditionKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_add);
+
+ expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_add";
+ actual = selected_impl_add->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // KERNEL_SELECTION
+
TEST_SUITE(TRANSPOSE_1XW)
-using NEGEMMTranspose1xW = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMTranspose1xWKernel, 4>;
+using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>;
DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
- framework::dataset::make("N", { 1, 23, 63, 101 }),
- framework::dataset::make("K", { 1, 47, 29, 27 })),
+ make("N", { 1, 23, 63, 101 }),
+ make("K", { 1, 47, 29, 27 })),
n_value, k_value)
{
- bool status = validate_zero_padding<NEGEMMTranspose1xWKernel>(n_value, k_value);
+ bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value);
ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
}
TEST_SUITE(U32)
-using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint32_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32))
+using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U32))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -128,8 +281,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMo
TEST_SUITE_END() // U32
TEST_SUITE(U16)
-using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint16_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16))
+using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U16))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -137,8 +290,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMo
TEST_SUITE_END() // U16
TEST_SUITE(U8)
-using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8))
+using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U8))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -148,20 +301,20 @@ TEST_SUITE_END() // U8
TEST_SUITE_END() // TRANSPOSE_1XW
TEST_SUITE(INTERLEAVE_4X4)
-using NEGEMMInterleave4x4 = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMInterleave4x4Kernel, 4>;
+using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>;
DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
- framework::dataset::make("M", { 1, 23, 63, 101 }),
- framework::dataset::make("K", { 1, 47, 29, 27 })),
+ make("M", { 1, 23, 63, 101 }),
+ make("K", { 1, 47, 29, 27 })),
m_value, k_value)
{
- bool status = validate_zero_padding<NEGEMMInterleave4x4Kernel>(m_value, k_value);
+ bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value);
ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
}
TEST_SUITE(U32)
-using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint32_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32))
+using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U32))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -169,8 +322,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetM
TEST_SUITE_END() // U32
TEST_SUITE(U16)
-using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint16_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16))
+using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U16))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -178,8 +331,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetM
TEST_SUITE_END() // U16
TEST_SUITE(U8)
-using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8))
+using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::QASYMM8))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -192,17 +345,20 @@ template <typename T>
using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
template <typename T>
-using NEGEMMFixtureDisabledC = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true>;
+using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>;
+
+template <typename T>
+using NEGEMMAccumulateFixture = GEMMAccumulateValidationFixture<Tensor, Accessor, NEGEMM, T>;
TEST_SUITE(Float)
-DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U),
+DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(make("In0", { TensorShape(21U, 13U),
TensorShape(31U, 1U),
TensorShape(31U, 1U),
TensorShape(8U, 2U),
TensorShape(38U, 12U),
TensorShape(32U, 1U)
}),
- framework::dataset::make("In1", { TensorShape(33U, 21U),
+ make("In1", { TensorShape(33U, 21U),
TensorShape(23U, 31U),
TensorShape(23U, 31U),
TensorShape(16U, 8U),
@@ -215,59 +371,111 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::
ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
}
+DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine(
+ zip(make("In0",{ TensorShape(21U, 13U) }),
+ make("In1", { TensorShape(33U, 21U) }),
+ make("Dst", { TensorShape(33U, 13U) })),
+ zip(
+ make("alpha", { 1.0, 100.0, 1.0, 1.0 }),
+ make("beta", { 0.0, 0.0, 1.0, 1.0 }),
+ make("is_c_null", { false, false, false, true }),
+ make("Expected", { true, false, false, true }))),
+ shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected)
+{
+ /* Accumulation test for GEMM kernels */
+ // Create tensors
+ TensorInfo in_a(shape_a, 1, DataType::F32);
+ TensorInfo in_b(shape_b, 1, DataType::F32);
+ TensorInfo in_c(shape_dst, 1, DataType::F32);
+ TensorInfo dst(shape_dst, 1, DataType::F32);
+
+ GEMMInfo gemm_info = GEMMInfo();
+ gemm_info.set_accumulate(true);
+
+ // Validate accumulation
+ cpu::CpuGemm gemm;
+ Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info);
+ ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS);
+}
+
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
- framework::dataset::make("ReshapeWeights", { true, false })),
- framework::dataset::make("DataType", DataType::F16)))
+ make("ReshapeWeights", { true, false })),
+ make("DataType", DataType::F16)))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
- framework::dataset::make("ReshapeWeights", { true, false })),
+ make("ReshapeWeights", { true, false })),
+ make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+}
- framework::dataset::make("DataType", DataType::F16)))
+TEST_SUITE(BATCHED_MATMUL)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+ make("ReshapeWeights", { false })),
+ make("DataType", DataType::F16)))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
}
-TEST_SUITE_END()
+TEST_SUITE_END() // BATCHED_MATMUL
+
+TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
- framework::dataset::make("ReshapeWeights", { true, false })),
-
- framework::dataset::make("DataType", DataType::F32)))
+ make("ReshapeWeights", { true, false })),
+ make("DataType", DataType::F32)))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
- framework::dataset::make("ReshapeWeights", { true, false })),
+ make("ReshapeWeights", { true, false })),
+ make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f);
+}
- framework::dataset::make("DataType", DataType::F32)))
+TEST_SUITE(BATCHED_MATMUL)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+ make("ReshapeWeights", { false })),
+ make("DataType", DataType::F32)))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f);
}
-TEST_SUITE(DisabledC)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixtureDisabledC<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
- framework::dataset::make("ReshapeWeights", { true, false })),
+TEST_SUITE_END() // BATCHED_MATMUL
- framework::dataset::make("DataType", DataType::F32)))
+TEST_SUITE(ACCUMULATE)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAccumulateFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallAccumulateGEMMDataset(),
+ make("ReshapeWeights", { false }),
+ make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMAccumulateFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeAccumulateGEMMDataset(),
+ make("ReshapeWeights", { false }),
+ make("DataType", DataType::F32)))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f);
}
-TEST_SUITE_END()
+TEST_SUITE_END() // ACCUMULATE
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // GEMM
+TEST_SUITE_END() // NEON
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 9d075e12c1..d25f43a330 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,8 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/PaddingCalculator.h"
@@ -45,10 +47,26 @@ namespace test
{
namespace validation
{
+using framework::dataset::make;
+
+namespace
+{
+ constexpr AbsoluteTolerance<float> tolerance_batched(1);
+ constexpr AbsoluteTolerance<float> tolerance_quant(1);
+} // namespace
+
+
TEST_SUITE(NEON)
TEST_SUITE(GEMMLowp)
TEST_SUITE(MatrixMultiplyCore)
+
using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
+using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+
+using framework::dataset::make;
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
shape_a, shape_b, shape_c, a_offset, b_offset)
@@ -74,29 +92,69 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c
validate(b.info()->padding(), PaddingSize());
validate(c.info()->padding(), PaddingSize());
}
+// accumulation is not supported for Int8/UInt8 in aarch32
+#ifdef __aarch64__
+DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine(
+ zip(
+ make("In0",{ TensorShape(21U, 1U) }),
+ make("In1", { TensorShape(1U, 21U) }),
+ make("Dst", { TensorShape(1U, 1U) }),
+ make("a_offset", { -2 }),
+ make("a_offset", { 13 })
+ ),
+ zip(
+ make("OutputDataType", { DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED}),
+ make("Expected", { true, false, false })
+ )),
+ shape_a, shape_b, shape_dst, a_offset, b_offset, output_data_type, expected)
+{
+ DataType input_data_type = (output_data_type == DataType::S32 ? DataType::QASYMM8 : output_data_type);
+ // Accumulation test for GEMM kernels
+ TensorInfo a(shape_a, 1, input_data_type, QuantizationInfo(1.0f / 255, a_offset));
+ TensorInfo b(shape_b, 1, input_data_type, QuantizationInfo(1.0f / 255, b_offset));
+ TensorInfo dst(shape_dst, 1, output_data_type, QuantizationInfo());
+
+ // Create and configure function
+ GEMMInfo gemm_info = GEMMInfo();
+ gemm_info.set_accumulate(true);
+
+ if (is_data_type_quantized(output_data_type))
+ {
+ GEMMLowpOutputStageInfo gemmLowpOutputStageInfo = GEMMLowpOutputStageInfo();
+ gemmLowpOutputStageInfo.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+
+ gemm_info.set_gemmlowp_output_stage(gemmLowpOutputStageInfo);
+ }
+
+ cpu::CpuGemmLowpMatrixMultiplyCore gemmlowp_mm;
+ Status status = gemmlowp_mm.validate(&a, &b, nullptr, &dst, gemm_info);
+
+ ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS);
+}
+#endif // __arch64__
// *INDENT-OFF*
// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type
TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
}),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+ make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+ }),
+ make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
- })),
- framework::dataset::make("Expected", { true, false, false, false, true })),
+ }),
+ make("Expected", { true, false, false, false, true })),
a_info, b_info, output_info, expected)
{
// Lock tensors
@@ -109,444 +167,224 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
// clang-format on
// *INDENT-ON*
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
+/** Test case for memory injection in @ref cpu::CpuGemmLowpMatrixMultiplyCore.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
{
- // Validate output
- validate(Accessor(_target), _reference);
+ auto gemm = std::make_unique<cpu::CpuGemmLowpMatrixMultiplyCore>();
+ auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
+ auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
+ auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
+ a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
+ b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
+ const auto gemm_info = GEMMInfo{};
+ gemm->configure(&a_info, &b_info, nullptr, &dst_info, gemm_info);
+
+ // telhs are newly created every call of this lambda function
+ auto a = create_tensor<Tensor>(a_info);
+ auto b = create_tensor<Tensor>(b_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ITensorPack run_pack =
+ {
+ { TensorType::ACL_SRC_0, &a },
+ { TensorType::ACL_SRC_1, &b },
+ { TensorType::ACL_DST, &dst }
+ };
+ ITensorPack prep_pack =
+ {
+ { TensorType::ACL_SRC_1, &b },
+ };
+
+ auto mg = MemoryGroup{};
+ auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+ auto run_conv = [&]() -> Tensor
+ {
+ auto dst = create_tensor<Tensor>(dst_info);
+ dst.allocator()->allocate();
+ run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+ library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
+ library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
+ // This operator is configured once and captured by this lambda.
+ gemm->prepare(prep_pack);
+ gemm->run(run_pack);
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
+/** Test case for memory injection in @ref NEGEMMLowpMatrixMultiplyCore.
+ *
+ * Make sure @ref NEGEMMLowpMatrixMultiplyCore still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
{
- // Validate output
- validate(Accessor(_target), _reference);
+ auto gemm = std::make_unique<NEGEMMLowpMatrixMultiplyCore>();
+ auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
+ auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
+ auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
+ a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
+ b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
+ const auto gemm_info = GEMMInfo{};
+ auto run_conv = [&]()
+ {
+ auto a = create_tensor<Tensor>(a_info);
+ auto b = create_tensor<Tensor>(b_info);
+ auto dst = create_tensor<Tensor>(dst_info);
+ gemm->configure(&a, &b, nullptr, &dst, gemm_info);
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ dst.allocator()->allocate();
+ library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
+ library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
+ gemm->run();
+ return dst;
+ };
+ auto result_0 = run_conv();
+ auto result_1 = run_conv();
+ for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+ {
+ ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+ }
}
-using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
-TEST_SUITE(FusedOffsetOutput)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
- framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
- framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
-TEST_SUITE_END() // FusedOffsetOutput
-TEST_SUITE_END() // MatrixMultiplyCore
-
-TEST_SUITE(OutputStage)
-
-TEST_SUITE(QuantizeDownInt32Scale)
+TEST_SUITE(BatchedMatMul)
TEST_SUITE(QASYMM8)
-
-const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
- 3)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
- 2)
- * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
-
-using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>;
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
- }),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(20U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
- })),
- framework::dataset::make("Min",{ 0,
- 13,
- })),
- framework::dataset::make("Max",{ 205,
- 180,
- })),
- framework::dataset::make("Expected", { true, false })),
- a_info, b_info, output_info, min, max, expected)
-{
-
- GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
- output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN;
- output_stage.gemmlowp_min_bound = min;
- output_stage.gemmlowp_max_bound = max;
- output_stage.output_data_type = DataType::QASYMM8;
-
- // Lock tensors
- Status status = NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false),
- &b_info.clone()->set_is_resizable(false),
- &output_info.clone()->set_is_resizable(false),
- output_stage);
- ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_CASE(NoPaddingAdded, framework::DatasetMode::PRECOMMIT)
+using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
+ GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { false })))
{
- Tensor input1 = create_tensor<Tensor>(TensorShape(21U, 13U), DataType::S32);
- Tensor input2 = create_tensor<Tensor>(TensorShape(21U, 1U), DataType::S32);
- Tensor output = create_tensor<Tensor>(TensorShape(21U, 13U), DataType::QASYMM8);
-
- GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
- output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN;
- output_stage.gemmlowp_min_bound = 0;
- output_stage.gemmlowp_max_bound = 205;
- output_stage.output_data_type = DataType::QASYMM8;
-
- NEGEMMLowpOutputStage f;
- f.configure(&input1, &input2, &output, output_stage);
-
- // Validate padding is zero
- validate(input1.info()->padding(), PaddingSize());
- validate(input2.info()->padding(), PaddingSize());
- validate(output.info()->padding(), PaddingSize());
+ validate(Accessor(_target), _reference, tolerance_batched);
}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-
-TEST_SUITE_END() // BoundedReLu
-
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-
-const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
- 3)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
- 2)
- * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 74) * framework::dataset::make("addBias", { false, true });
-
-using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>;
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
- }),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(20U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
- })),
- framework::dataset::make("Min",{ -10,
- -200,
- -113,
- })),
- framework::dataset::make("Max",{ 105,
- 300,
- -18,
- })),
- framework::dataset::make("Expected", { true, false, false })),
- a_info, b_info, output_info, min, max, expected)
+using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
+ GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+ make("DataType", { DataType::QASYMM8_SIGNED }),
+ make("reshape_b_only_on_first_run", { false })))
{
- GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
- output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN;
- output_stage.gemmlowp_min_bound = min;
- output_stage.gemmlowp_max_bound = max;
- output_stage.output_data_type = DataType::QASYMM8_SIGNED;
-
- // Lock tensors
- Status status = NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false),
- &b_info.clone()->set_is_resizable(false),
- &output_info.clone()->set_is_resizable(false),
- output_stage);
- ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ validate(Accessor(_target), _reference, tolerance_batched);
}
-// clang-format on
-// *INDENT-ON*
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // BatchedMatMul
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+TEST_SUITE(FusedOffsetOutput)
+using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL,
+ combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { false })))
{
// Validate output
- validate(Accessor(_target), _reference);
+ validate(Accessor(_target), _reference, tolerance_quant);
}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+ make("DataType", { DataType::QASYMM8 }),
+ make("reshape_b_only_on_first_run", { false })))
{
// Validate output
- validate(Accessor(_target), _reference);
+ validate(Accessor(_target), _reference, tolerance_quant);
}
+TEST_SUITE_END() // FusedOffsetOutput
-TEST_SUITE_END() // BoundedReLu
-
-TEST_SUITE_END() // QASYMM8_SIGNED
-
-TEST_SUITE_END() // QuantizeDownInt32Scale
-
-TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
-
-using NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>;
-
-using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
- }),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(20U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
- })),
- framework::dataset::make("Min",{ 0,
- 13,
- })),
- framework::dataset::make("Max",{ 205,
- 180,
- })),
- framework::dataset::make("Expected", { true, false })),
- a_info, b_info, output_info, min, max, expected)
-{
- // Lock tensors
- Status status = NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false),
- &b_info.clone()->set_is_resizable(false),
- &output_info.clone()->set_is_resizable(false),
- min,
- max);
- ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+// accumulation is not supported for Int8/UInt8 in aarch32
+#ifdef __aarch64__
+TEST_SUITE(ACCUMULATION)
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
+TEST_SUITE_END() // S32
+TEST_SUITE_END() // ACCUMULATION
+#endif // __arch64__
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+TEST_SUITE(DynamicQuantization)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
- quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
{
// Validate output
validate(Accessor(_target), _reference);
}
-TEST_SUITE_END() // BoundedReLu
-
-TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint
-
-TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint)
-
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-using NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint>;
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::F32), // Invalid input data type
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),
- }),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(20U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
- })),
- framework::dataset::make("Min",{ -110,
- -113,
- -113,
- })),
- framework::dataset::make("Max",{ 87,
- 97,
- 97,
- })),
- framework::dataset::make("Expected", { false, false, true })),
- a_info, b_info, output_info, min, max, expected)
-{
- // Lock tensors
- Status status = NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false),
- &b_info.clone()->set_is_resizable(false),
- &output_info.clone()->set_is_resizable(false),
- min,
- max);
- ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int8_scale_by_fixedpoint_cases))
+TEST_SUITE_END() // DynamicQuantization
+
+#ifdef __aarch64__
+// Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64
+TEST_SUITE(Dequant)
+constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f);
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL,
+ combine(
+ datasets::SmallGEMMLowpDataset(),
+ make("accumulate", {true, false})
+ ))
{
// Validate output
- validate(Accessor(_target), _reference);
+ validate(Accessor(_target), _reference, tolerance_dequantized);
}
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY,
+ combine(
+ datasets::LargeGEMMLowpDataset(),
+ make("accumulate", {false})
+ ))
{
// Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QuantizeDownInt32ToInt8ScaleByFixedPoint
-
-TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint)
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
- 2)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823,
- 1073741825)
- * framework::dataset::make("result_shift", -3,
- -2)
- * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
- 254601602)
- * framework::dataset::make("result_shift", -3,
- -1)
- * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
- GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
- framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
- }),
- framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(20U), 1, DataType::S32),
- })),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
- TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
- })),
- framework::dataset::make("Min",{ -205,
- -180,
- })),
- framework::dataset::make("Max",{ 205,
- 180,
- })),
- framework::dataset::make("Expected", { true, false })),
- a_info, b_info, output_info, min, max, expected)
-{
- // Lock tensors
- Status status = NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false),
- &b_info.clone()->set_is_resizable(false),
- &output_info.clone()->set_is_resizable(false),
- min,
- max);
- ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ validate(Accessor(_target), _reference, tolerance_dequantized);
}
-// clang-format on
-// *INDENT-ON*
+TEST_SUITE_END() // Dequant
+#endif // __aarch64__
-TEST_SUITE(NoRelu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // NoRelu
-TEST_SUITE(BoundedReLu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
- quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases))
-{
- // Validate output
- validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QuantizeDownInt32ToInt16ScaleByFixedPoint
-TEST_SUITE_END() // OutputStage
+TEST_SUITE_END() // MatrixMultiplyCore
TEST_SUITE_END() // GEMMLowp
-TEST_SUITE_END() // Neon
+TEST_SUITE_END() // NEON
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/NEON/Gather.cpp b/tests/validation/NEON/Gather.cpp
index ca1e166bd1..0aea19939e 100644
--- a/tests/validation/NEON/Gather.cpp
+++ b/tests/validation/NEON/Gather.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -100,12 +100,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
template <typename T>
using NEGatherFixture = GatherFixture<Tensor, Accessor, NEGather, T>;
+const auto gather_small_shapes = arm_compute::test::framework::dataset::concat(datasets::SmallGatherDataset(), datasets::SmallGatherMultiDimIndicesDataset());
+
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEGatherFixture<half>,
framework::DatasetMode::PRECOMMIT,
- combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::F16)))
+ combine(gather_small_shapes, framework::dataset::make("DataType", DataType::F16)))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -125,7 +127,7 @@ TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEGatherFixture<float>,
framework::DatasetMode::PRECOMMIT,
- combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::F32)))
+ combine(gather_small_shapes, framework::dataset::make("DataType", DataType::F32)))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -146,7 +148,7 @@ TEST_SUITE(U8)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEGatherFixture<uint8_t>,
framework::DatasetMode::PRECOMMIT,
- combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::U8)))
+ combine(gather_small_shapes, framework::dataset::make("DataType", DataType::U8)))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -166,7 +168,7 @@ TEST_SUITE(U16)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEGatherFixture<uint16_t>,
framework::DatasetMode::PRECOMMIT,
- combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::U16)))
+ combine(gather_small_shapes, framework::dataset::make("DataType", DataType::U16)))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp
index 156957a601..ef5e75c5db 100644
--- a/tests/validation/NEON/Im2Col.cpp
+++ b/tests/validation/NEON/Im2Col.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/cpu/kernels/CpuIm2ColKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -57,7 +57,7 @@ const auto conv_args_small = combine(combine(combine(combine(conv_filter
TEST_SUITE(NEON)
TEST_SUITE(Im2Col)
-using NEIm2Col = NESynthetizeFunction<NEIm2ColKernel>;
+using CpuIm2Col = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuIm2ColKernel>;
// *INDENT-OFF*
// clang-format off
@@ -78,26 +78,26 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
framework::dataset::make("Expected", { false, false, false, false, true })),
input_info, output_info, has_bias, expected)
{
- bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
+ bool status = bool(cpu::kernels::CpuIm2ColKernel::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
}
// clang-format on
// *INDENT-ON*
template <typename T>
-using NEIm2ColFixture = Im2ColValidationFixture<Tensor, Accessor, NEIm2Col, T, false>;
+using CpuIm2ColFixture = Im2ColOpValidationFixture<Tensor, Accessor, CpuIm2Col, T, false>;
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)),
- conv_args_small))
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)),
+ conv_args_small))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType",
- DataType::F32)),
- conv_args))
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType",
+ DataType::F32)),
+ conv_args))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -107,15 +107,15 @@ TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)),
- conv_args_small))
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)),
+ conv_args_small))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType",
- DataType::F16)),
- conv_args))
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType",
+ DataType::F16)),
+ conv_args))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -127,15 +127,15 @@ TEST_SUITE_END() // FP16
TEST_SUITE_END() // Float
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)),
- conv_args_small))
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)),
+ conv_args_small))
{
// Validate output
validate(Accessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- conv_args))
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ conv_args))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -165,8 +165,8 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT)
Tensor dst_target = create_tensor<Tensor>(dst_shape, data_type, 1, qinfo);
// Configure target function
- NEIm2Col im2col_func;
- im2col_func.configure(&src_target, &dst_target, spatial_kernel, conv_info, has_bias);
+ CpuIm2Col im2col_func;
+ im2col_func.configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias);
// Extend padding
src_target.info()->extend_padding(PaddingSize(3, 5, 9, 1));
@@ -185,8 +185,13 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT)
// Fill target source
library->fill_tensor_uniform(Accessor(src_target), 0);
+ ITensorPack pack =
+ {
+ { TensorType::ACL_SRC, &src_target },
+ { TensorType::ACL_DST, &dst_target }
+ };
// Run target function
- im2col_func.run();
+ im2col_func.run(pack);
// Calculate Reference
SimpleTensor<float> src_ref{ src_shape, data_type, 1, qinfo, data_layout };
diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp
index d391267e3e..6b98ee2b67 100644
--- a/tests/validation/NEON/LSTMLayerQuantized.cpp
+++ b/tests/validation/NEON/LSTMLayerQuantized.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,11 +64,7 @@ inline void fill_tensor(SimpleTensor<T> &tensor, const std::vector<T> &v)
}
/** Tolerance for quantized asymmetric operations */
-#if defined(__aarch64__)
-constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(0);
-#else // defined(__aarch64__)
constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1);
-#endif // defined(__aarch64__)
} // namespace
diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp
new file mode 100644
index 0000000000..f22bd9e86a
--- /dev/null
+++ b/tests/validation/NEON/MatMul.cpp
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEMatMul.h"
+
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/validation/fixtures/MatMulFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+TEST_SUITE(NEON)
+TEST_SUITE(MatMul)
+
+constexpr AbsoluteTolerance<float> tolerance_fp32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
+const AbsoluteTolerance<half> tolerance_fp16(half(0.1f));
+#ifdef __aarch64__
+constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8(1);
+constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8_signed(1);
+#endif // __aarch64__
+
+// clang-format off
+// *INDENT-OFF*
+// Validation Tests
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL,
+ zip(
+ make("InputAInfo", {
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), // Mismatching datatype
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::S32), // Unsupported datatypes
+ TensorInfo(TensorShape(9U, 6U, 2U), 1, DataType::F32), // Broadcasting in batch dimension not supported
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), // Invalid shape for multiplication
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::F32),
+ TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32),
+ TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32), // Tensors are not dynamic
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED), // Mismatching data type
+ }),
+ make("InputBInfo", {
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::S32),
+ TensorInfo(TensorShape(5U, 9U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 12U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED),
+ }),
+ make("OutputInfo", {
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::S32),
+ TensorInfo(TensorShape(5U, 6U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8),
+ }),
+ make("TensorIsConst", {false, false, false, false, false , false, true, false, false, false}),
+ make("Expected", { false, false, false, false, true, true, false, true, true, false })),
+ a_info, b_info, output_info, are_tensors_const, expected)
+{
+ TensorInfo a{a_info};
+ TensorInfo b{b_info};
+ a.set_are_values_constant(are_tensors_const);
+ b.set_are_values_constant(are_tensors_const);
+ Status status = NEMatMul::validate(&a,
+ &b,
+ &output_info,
+ MatMulInfo(),
+ CpuMatMulSettings());
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// *INDENT-ON*
+// clang-format on
+
+// Generic Template
+template <typename T>
+using NEMatMulFixture = MatMulValidationWithActivationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>;
+
+// Fast math Template
+template <typename T>
+using NEMatMulFastMathFixture = MatMulGenericValidationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>;
+
+template <typename T>
+using NEMatMulFixedFormatFixture = MatMulFixedFormatFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>;
+
+template <typename T>
+using NEMatMulDynamicTensorsFixture =
+ MatMulValidationWithDynamicTensorsFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>;
+
+template <typename T>
+using NEQuantizedMatMulFixture = QuantizedMatMulValidationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEMatMulFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ NEMatMulFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+FIXTURE_DATA_TEST_CASE(RunHighDimensions,
+ NEMatMulFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::HighDimensionalMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors,
+ NEMatMulDynamicTensorsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfRuns", 5)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // FP32
+
+#ifdef ARM_COMPUTE_ENABLE_BF16
+/* Note : MatMul BF16 is enabled by specifying FP32 datatype and enabling the fast math setting */
+constexpr AbsoluteTolerance<float> tolerance_bf16(0.02f);
+TEST_SUITE(BF16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEMatMulFastMathFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo", {ActivationLayerInfo()}),
+ make("RunTimes", {0}),
+ make("Settings", {CpuMatMulSettings().fast_math(true)}),
+ make("LhsQInfo", {QuantizationInfo()}),
+ make("RhsQInfo", {QuantizationInfo()}),
+ make("OutQInfo", {QuantizationInfo()})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_bf16);
+}
+
+#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+FIXTURE_DATA_TEST_CASE(RunTinyFixedFormat,
+ NEMatMulFixedFormatFixture<bfloat16>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::TinyMatMulDataset(),
+ make("TransposeA", {false}),
+ make("TransposeB", {false}),
+ make("DataType", DataType::BFLOAT16),
+ make("ActivationInfo", {ActivationLayerInfo()}),
+ make("RunTimes", {0}),
+ make("Settings", {CpuMatMulSettings().fast_math(true).fixed_format(true)}),
+ make("LhsQInfo", {QuantizationInfo()}),
+ make("RhsQInfo", {QuantizationInfo()}),
+ make("OutQInfo", {QuantizationInfo()})))
+{
+ if (CPUInfo::get().has_bf16())
+ {
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_bf16);
+ }
+}
+#endif /* ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS */
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ NEMatMulFastMathFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F32),
+ make("ActivationInfo", {ActivationLayerInfo()}),
+ make("RunTimes", {0}),
+ make("Settings", {CpuMatMulSettings().fast_math(true)}),
+ make("LhsQInfo", {QuantizationInfo()}),
+ make("RhsQInfo", {QuantizationInfo()}),
+ make("OutQInfo", {QuantizationInfo()})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_bf16, 0.01 /* tolerance_num */);
+}
+TEST_SUITE_END() // BF16
+#endif /* ARM_COMPUTE_ENABLE_BF16 */
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEMatMulFixture<half>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F16),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ NEMatMulFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F16),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp16);
+}
+FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors,
+ NEMatMulDynamicTensorsFixture<half>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::F16),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfRuns", 5)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp16);
+}
+TEST_SUITE_END() // FP16
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE_END() // Float
+
+#ifdef __aarch64__ // All the GeMM CPU assembly kernels for integer datatypes require aarch64
+TEST_SUITE(Quantized)
+
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEQuantizedMatMulFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 50, 1)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 30, -1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 2)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation,
+ NEQuantizedMatMulFixture<uint8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::SmallerMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 50, 1)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 30, -1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 2)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ NEQuantizedMatMulFixture<uint8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 100, 1)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 200, -1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 2)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ NEQuantizedMatMulFixture<int8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(datasets::SmallMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 40, -2)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 50, 1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 1)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation,
+ NEQuantizedMatMulFixture<int8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::SmallerMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 40, -2)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 50, 1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 1)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ NEQuantizedMatMulFixture<int8_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(datasets::LargeMatMulDataset(),
+ make("TransposeA", {false, true}),
+ make("TransposeB", {false, true}),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+}),
+make("NumberOfExtraRuns", {0, 1}),
+make("LhsQInfo", {QuantizationInfo(1.f / 150, -2)}),
+make("RhsQInfo", {QuantizationInfo(1.f / 250, 1)}),
+make("OutQInfo", {QuantizationInfo(1.f, 1)})))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // Quantized
+#endif // __aarch64__
+
+TEST_SUITE_END() // MatMul
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/MaxUnpoolingLayer.cpp b/tests/validation/NEON/MaxUnpoolingLayer.cpp
index 27f131fa51..0eb021fe71 100644
--- a/tests/validation/NEON/MaxUnpoolingLayer.cpp
+++ b/tests/validation/NEON/MaxUnpoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,10 +22,12 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
@@ -33,7 +35,6 @@
#include "tests/framework/datasets/Datasets.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/MaxUnpoolingLayerFixture.h"
-
namespace arm_compute
{
namespace test
@@ -51,7 +52,7 @@ const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(framework::datase
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
framework::dataset::make("DataType", DataType::F32))),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
@@ -63,7 +64,7 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framewor
TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
framework::dataset::make("DataType", DataType::F16))),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
@@ -74,7 +75,37 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework
}
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
TEST_SUITE_END() // Float
+
+TEST_SUITE(KernelSelection)
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED
+ })),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuMaxUnpoolingLayerKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_maxunpooling";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // KernelSelection
TEST_SUITE_END() // PoolingLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
index 90d3d05a0d..085f3608a0 100644
--- a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,8 @@ namespace
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<half> tolerance_f16(half(0.2f));
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-RelativeTolerance<float> tolerance_f32(1e-8f);
+RelativeTolerance<float> tolerance_f32(1e-4f);
+RelativeTolerance<uint8_t> tolerance_qasymm8(1);
} // namespace
TEST_SUITE(NEON)
@@ -114,9 +115,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<float>, f
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("InPlace", { false, true })),
+ framework::dataset::make("Epsilon", { 1e-7 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // QASYMM8
+
TEST_SUITE_END() // MeanStdNormalizationLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index 1bb0588919..964d1c5deb 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,6 +56,11 @@ const auto PixelWiseMultiplicationQASYMM8QuantDataset = combine(combine(
framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0) })),
framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 32768.f, 0) }));
+const auto PixelWiseMultiplicationQASYMM8QuantInPlaceDataset = combine(combine(
+ framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 32768.f, 10) }),
+ framework::dataset::make("Src1QInfo", { QuantizationInfo(5.f / 32768.f, 10) })),
+ framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 10) }));
+
const auto PixelWiseMultiplicationPolicySTNUDataset = combine(
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE }),
framework::dataset::make("RoundingPolicy", { RoundingPolicy::TO_NEAREST_UP }));
@@ -75,7 +80,8 @@ const auto PixelWiseMultiplicationPolicySTZDataset = combine(
* expected to have either different quantization information, data type
* or different shape we are not testing in-place computation.
*/
-const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
#define DEFAULT_VALIDATE validate(Accessor(_target), _reference);
#define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
@@ -275,7 +281,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture,
framework::dataset::make("Scale", { scale_unity })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQASYMM8QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("Scale", { scale_unity })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantInPlaceDataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -292,7 +310,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew
framework::dataset::make("Scale", { scale_255 })),
PixelWiseMultiplicationPolicySTNUDataset),
PixelWiseMultiplicationQASYMM8QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -306,7 +324,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew
framework::dataset::make("Scale", { scale_unity })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQASYMM8QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -320,7 +338,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew
framework::dataset::make("Scale", { scale_other })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQASYMM8QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -335,7 +353,20 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationBroadcastQASYMM8Fixtur
framework::dataset::make("Scale", { scale_other })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQASYMM8QuantDataset),
- framework::dataset::make("InPlace", { false })))
+ OutOfPlaceDataSet))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyInPlace, NEPixelWiseMultiplicationBroadcastQASYMM8Fixture, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+ framework::dataset::make("DataTypeIn1", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeIn2", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", DataType::QASYMM8)),
+ framework::dataset::make("Scale", { scale_other })),
+ PixelWiseMultiplicationPolicySTZDataset),
+ PixelWiseMultiplicationQASYMM8QuantInPlaceDataset),
+ InPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -351,7 +382,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew
framework::dataset::make("Scale", { scale_255 })),
PixelWiseMultiplicationPolicySTNUDataset),
PixelWiseMultiplicationQSYMM16QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qsymm16);
@@ -365,7 +396,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew
framework::dataset::make("Scale", { scale_unity })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQSYMM16QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qsymm16);
@@ -379,7 +410,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew
framework::dataset::make("Scale", { scale_other })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQSYMM16QuantDataset),
- InPlaceDataSet))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qsymm16);
@@ -394,7 +425,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16ToS32Fixture, f
framework::dataset::make("Scale", { scale_unity })),
PixelWiseMultiplicationPolicySTZDataset),
PixelWiseMultiplicationQSYMM16QuantDataset),
- framework::dataset::make("InPlace", { false })))
+ OutOfPlaceDataSet))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -411,7 +442,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationU8U8ToS16Fixture, fram
framework::dataset::make("Scale", { scale_255 })),
datasets::ConvertPolicies()),
framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
- framework::dataset::make("InPlace", { false })))
+ OutOfPlaceDataSet))
{
// Validate output
validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<int16_t>(1), 0.f);
@@ -451,17 +482,17 @@ TEST_SUITE_END() // U8toU8
TEST_SUITE(U8toS16)
TEST_SUITE(Scale255)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_255, TO_NEAREST_UP, framework::dataset::make("InPlace", { false }),
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_255, TO_NEAREST_UP, OutOfPlaceDataSet,
WRAP_VALIDATE(int16_t, 2))
TEST_SUITE_END() // Scale255
TEST_SUITE(ScaleUnity)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_unity, TO_ZERO, framework::dataset::make("InPlace", { false }),
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_unity, TO_ZERO, OutOfPlaceDataSet,
DEFAULT_VALIDATE)
TEST_SUITE_END() // ScaleUnity
TEST_SUITE(ScaleOther)
-PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_other, TO_ZERO, framework::dataset::make("InPlace", { false }),
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_other, TO_ZERO, OutOfPlaceDataSet,
DEFAULT_VALIDATE)
TEST_SUITE_END() // ScaleOther
diff --git a/tests/validation/NEON/Pooling3dLayer.cpp b/tests/validation/NEON/Pooling3dLayer.cpp
new file mode 100644
index 0000000000..07054462f5
--- /dev/null
+++ b/tests/validation/NEON/Pooling3dLayer.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEPooling3dLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/Pooling3dLayerDataset.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/Pooling3dLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets for floating-point data types */
+const auto Pooling3dLayerDatasetFP = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 1, 0), Padding3D(1, 1, 1) })),
+ framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 2, 2), Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(2, 2, 2), Size3D(2, 1, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3dLayerDatasetQASYMM8Small = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+ framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true }));
+
+const auto Pooling3dLayerDatasetQASYMM8Large = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+ framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })),
+ framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 2, 1) })),
+ framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 0) })),
+ framework::dataset::make("ExcludePadding", { true }));
+
+using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
+
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
+
+const auto qasymm8_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, 10) });
+const auto qasymm8_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo",
+{
+ QuantizationInfo(.2f, 10), // Same qinfo
+ QuantizationInfo(.1f, 5), // Multiplier <= 1
+ QuantizationInfo(2.f, 3) // Multiplier > 1
+});
+
+const auto qasymm8_signed_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, -10) });
+const auto qasymm8_signed_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo",
+{
+ QuantizationInfo(.2f, -10), // Same qinfo
+ QuantizationInfo(.1f, -5), // Multiplier <= 1
+ QuantizationInfo(2.f, -3) // Multiplier > 1
+});
+
+} //namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Pooling3dLayer)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Mismatching data type
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+ TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output shape
+ TensorInfo(TensorShape(5U, 13U, 15U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global Pooling
+ TensorInfo(TensorShape(13U,13U, 5U, 1U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data type
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NHWC), // Invalid data layout
+ TensorInfo(TensorShape(5U, 13U, 13U, 5U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(1U, 16U, 1U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ }),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U, 3U, 3U), 1, DataType::F16, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 30U, 11U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 25U, 16U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(2U, 27U, 13U, 3U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 1U, 1U, 1U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global pooling applied
+ TensorInfo(TensorShape(5U, 2U, 2U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling
+ TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::QASYMM8, DataLayout::NDHWC), // Invalid data type
+ TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data layout
+ TensorInfo(TensorShape(5U, 1U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(1U, 15U, 1U, 2U, 4U), 1, DataType::F32, DataLayout::NDHWC), // size larger than height
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+ TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+ })),
+ framework::dataset::make("PoolInfo", { Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(2, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::L2, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+ Pooling3dLayerInfo(PoolingType::AVG),
+ Pooling3dLayerInfo(PoolingType::MAX),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG),
+ Pooling3dLayerInfo(PoolingType::MAX, 2, Size3D(1, 1, 2), Padding3D(0, 0, 0), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(2U, 2U, 2U), Padding3D(), false),
+ Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), true), // pool size is equal to the padding size
+ Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), false), // pool size is equal to the padding size
+ Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(2U, 2U, 2U), Padding3D(2,1,2,2,1,2), false, false, DimensionRoundingType::CEIL), // CEIL with asymmetric Padding
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, false, true , false, true, false, false, false})),
+ input_info, output_info, pool_info, expected)
+{
+ bool is_valid = bool(NEPooling3dLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using NEPoolingLayer3dFixture = Pooling3dLayerValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>;
+
+template <typename T>
+using NESpecial3dPoolingLayerFixture = SpecialPooling3dLayerValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>;
+
+template <typename T>
+using NEPooling3dLayerGlobalFixture = Pooling3dLayerGlobalValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>;
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecial3dPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::Pooling3dLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5dShapes(), combine(Pooling3dLayerDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F32))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+ TensorShape(4U, 27U, 13U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", {false, true})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunGlobalSmall, NEPooling3dLayerGlobalFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+ TensorShape(27U, 13U, 4U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+ TensorShape(4U, 79U, 37U, 11U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", {false, true})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP32
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F16))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP,
+ framework::dataset::make("DataType",
+ DataType::F16))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+ TensorShape(4U, 27U, 13U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", {false, true})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+
+FIXTURE_DATA_TEST_CASE(RunSmallGlobal, NEPooling3dLayerGlobalFixture<half>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+ TensorShape(27U, 13U, 4U, 4U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+ TensorShape(4U, 79U, 37U, 11U, 2U)
+ }),
+ framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+ framework::dataset::make("Strides", Size3D(1, 1, 1))),
+ framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+ framework::dataset::make("ExcludePadding", false)),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP16
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+TEST_SUITE_END() // Float
+TEST_SUITE(Quantized)
+
+template <typename T>
+using NEPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<Tensor, Accessor, NEPooling3dLayer, T>;
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Small,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ qasymm8_in_qinfo_dataset),
+ qasymm8_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Large,
+ framework::dataset::make("DataType", DataType::QASYMM8))),
+ qasymm8_in_qinfo_dataset),
+ qasymm8_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+ combine(Pooling3dLayerDatasetQASYMM8Small,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+ qasymm8_signed_in_qinfo_dataset),
+ qasymm8_signed_out_qinfo_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8_s);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // Pooling3dLayer
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index b70a18907f..161fe627cc 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/PoolingLayerDataset.h"
@@ -81,6 +80,14 @@ const auto qasymm8_signed_out_qinfo_dataset = framework::dataset::make("OutputQu
QuantizationInfo(.1f, -5), // Multiplier <= 1
QuantizationInfo(2.f, -3) // Multiplier > 1
});
+
+// Cases where pooling region is completely outside the input tensor (excluding global pooling)
+const auto pool_outside_input_dataset = zip(zip(zip(zip(
+ framework::dataset::make("Shape", { TensorShape{ 2U, 2U, 1U }, TensorShape{ 2U, 2U, 4U }, TensorShape{ 3U, 5U, 2U }, TensorShape{ 10U, 20U, 3U } }),
+ framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+ framework::dataset::make("PoolingSize", { Size2D{ 2, 2 }, Size2D{ 3, 3 }, Size2D{ 2, 2 }, Size2D{ 3, 6 } })),
+ framework::dataset::make("PadStride", { PadStrideInfo{ 1, 1, 2, 2 }, PadStrideInfo{ 1, 1, 4, 4 }, PadStrideInfo{ 1, 1, 3, 3 }, PadStrideInfo{ 1, 1, 2, 5 } })),
+ framework::dataset::make("ExcludePadding", { false, false, false, false }));
} // namespace
TEST_SUITE(NEON)
@@ -98,7 +105,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::QASYMM8), // Invalid exclude_padding = false with quantized type, no actual padding and NHWC
TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 16U, 1U), 1, DataType::F32),
- }),
+ TensorInfo(TensorShape(112, 112, 64,1), 1, DataType::F32, DataLayout::NHWC), // Mismatching number of channels
+ TensorInfo(TensorShape(112, 112, 64,1), 1, DataType::F32, DataLayout::NHWC), // Mismatching width
+ }),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
@@ -108,7 +117,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
TensorInfo(TensorShape(12U, 12U, 5U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(1U, 15U, 1U), 1, DataType::F32),
- })),
+ TensorInfo(TensorShape(56, 56, 64,1), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(56, 51, 64,1), 1, DataType::F32, DataLayout::NHWC),
+
+ })),
framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)),
PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)),
PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NCHW, PadStrideInfo(1, 1, 2, 0)),
@@ -118,8 +130,11 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NHWC, PadStrideInfo(), false),
PoolingLayerInfo(PoolingType::AVG, DataLayout::NCHW),
PoolingLayerInfo(PoolingType::MAX, 2, DataLayout::NHWC, PadStrideInfo(1, 1, 0, 0), false),
+ PoolingLayerInfo(PoolingType::MAX,3,DataLayout::NHWC,PadStrideInfo(2,2,1,1)),
+ PoolingLayerInfo(PoolingType::MAX,3,DataLayout::NHWC,PadStrideInfo(2,2,1,1)),
+
})),
- framework::dataset::make("Expected", { false, false, false, false, true, false, true, false, false})),
+ framework::dataset::make("Expected", { false, false, false, false, true, false, true, false, false, false, false})),
input_info, output_info, pool_info, expected)
{
bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info));
@@ -142,13 +157,26 @@ using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor
const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0) })),
framework::dataset::make("ExcludePadding", { true, false }));
-
+const auto PoolingLayerKernelIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(7, 7) })),
+ framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 1, 1, 1) })),
+ framework::dataset::make("ExcludePadding", { false }));
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
- framework::dataset::make("DataType",
- DataType::F32))),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerIndicesDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { false })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+ validate(Accessor(_target_indices), _ref_indices);
+}
+FIXTURE_DATA_TEST_CASE(RunKernelIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerKernelIndicesDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { true })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
@@ -159,7 +187,7 @@ FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecialPoolingLayerFixture<float>, framewor
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall,
framework::dataset::make("DataType",
DataType::F32))),
pool_data_layout_dataset))
@@ -167,7 +195,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::Datase
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(datasets::PoolingTypes(),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })),
@@ -186,22 +214,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<float>, framework::Datase
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
+TEST_SUITE(CornerCases)
+FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset,
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ pool_data_layout_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // CornerCases
TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
- framework::dataset::make("DataType",
- DataType::F16))),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
-
- ))
+FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerIndicesDatasetFPSmall,
+ framework::dataset::make("DataType",
+ DataType::F16))),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
validate(Accessor(_target_indices), _ref_indices);
}
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall,
framework::dataset::make("DataType", DataType::F16))),
pool_data_layout_dataset))
{
@@ -215,6 +253,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<half>, framework::Dataset
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
}
+TEST_SUITE(CornerCases)
+FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset,
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ pool_data_layout_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // CornerCases
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE_END() // Float
@@ -227,7 +275,7 @@ template <typename T>
using NEPoolingLayerQuantizedMixedDataLayoutFixture = PoolingLayerValidationQuantizedFixture<Tensor, Accessor, NEPoolingLayer, T, true>;
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetQASYMM8Small,
framework::dataset::make("DataType", DataType::QASYMM8))),
framework::dataset::make("DataLayout", { DataLayout::NCHW })),
@@ -237,7 +285,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, fr
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetQASYMM8Small,
framework::dataset::make("DataType", DataType::QASYMM8))),
framework::dataset::make("DataLayout", { DataLayout::NHWC })),
@@ -247,7 +295,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framew
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
@@ -262,7 +310,7 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayou
}
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetQASYMM8Small,
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
@@ -272,7 +320,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<int8_t>, framewo
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_s);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
index aeee54c835..bab7490762 100644
--- a/tests/validation/NEON/QuantizationLayer.cpp
+++ b/tests/validation/NEON/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,7 @@
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/QuantizationLayerFixture.h"
+
namespace arm_compute
{
namespace test
@@ -182,7 +183,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<uin
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15) })))
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15), QuantizationInfo(0.5f, 25) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_u8);
+}
+FIXTURE_DATA_TEST_CASE(ConvertUint8toInt8, NEQuantizationLayerQASYMM8GenFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
+ framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, -1) })),
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 127) })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
@@ -191,7 +201,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED
framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10), QuantizationInfo(2.0f, -25) })),
- framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15) })))
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15), QuantizationInfo(1.0f, 127) })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_s8);
@@ -211,7 +221,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED
framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })),
- framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5) })))
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5), QuantizationInfo(1.0f, 43) })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_s8);
@@ -220,11 +230,21 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<int
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, 10), QuantizationInfo(2.0f, -25) })),
- framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30) })))
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30), QuantizationInfo(2.0f, -128) })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_u8);
}
+FIXTURE_DATA_TEST_CASE(ConvertInt8toUint8, NEQuantizationLayerQASYMM8_SIGNEDGenFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+ framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
+ framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 0) })),
+ framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, -128) })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_s8);
+}
+
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
diff --git a/tests/validation/NEON/RNNLayer.cpp b/tests/validation/NEON/RNNLayer.cpp
index 14d9a5d14e..979aa0f2c5 100644
--- a/tests/validation/NEON/RNNLayer.cpp
+++ b/tests/validation/NEON/RNNLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -139,7 +139,7 @@ TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallRNNLayerDataset(), framework::dataset::make("DataType", DataType::F16)))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+ validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16);
}
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp
index b4a3f0d399..8ca0bb53a7 100644
--- a/tests/validation/NEON/ReduceMean.cpp
+++ b/tests/validation/NEON/ReduceMean.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,10 +46,15 @@ constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value f
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
constexpr AbsoluteTolerance<float> tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef __aarch64__
constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */
+constexpr AbsoluteTolerance<int8_t> tolerance_s8(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */
+#else // __aarch64__
+constexpr AbsoluteTolerance<uint8_t> tolerance_u8(2); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */
constexpr AbsoluteTolerance<int8_t> tolerance_s8(2); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */
+#endif // __aarch64__
-const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }),
+const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(2, 3), Coordinates(0, 1, 2, 3) }),
framework::dataset::make("KeepDims", { true }));
const auto axis_drop = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1), Coordinates(3) }), framework::dataset::make("KeepDims", { false }));
} // namespace
diff --git a/tests/validation/NEON/Remap.cpp b/tests/validation/NEON/Remap.cpp
deleted file mode 100644
index 3c02f8eece..0000000000
--- a/tests/validation/NEON/Remap.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NERemap.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/RemapFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(0);
-constexpr float tolerance_number = 0.f;
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(Remap)
-
-template <typename T>
-using NERemapFixture = RemapValidationFixture<Tensor, Accessor, NERemap, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType",
- DataType::U8)),
- framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
-{
- // Validate output
- validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType",
- DataType::U8)),
- framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
-{
- // Validate output
- validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/ReorderLayer.cpp b/tests/validation/NEON/ReorderLayer.cpp
new file mode 100644
index 0000000000..839ad0ac92
--- /dev/null
+++ b/tests/validation/NEON/ReorderLayer.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__aarch64__)
+
+#include "arm_compute/runtime/NEON/functions/NEReorderLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ReorderLayerDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ReorderFixture.h"
+#include "src/core/NEON/kernels/NEReorderKernel.h"
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+TEST_SUITE(NEON)
+TEST_SUITE(ReorderLayer)
+
+template <typename T>
+using NEReorderLayerAlias = ReorderValidationFixture<Tensor, Accessor, NEReorderLayer, T>;
+
+TEST_SUITE(FP32)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+DATA_TEST_CASE(ValidateReorderOHWIo8, framework::DatasetMode::ALL, combine(
+ zip(
+ make("InShape",{ TensorShape(10U, 9U), TensorShape(234U, 301U) }),
+ make("OutShape", { TensorShape(10U, 16U), TensorShape(234U, 304U) })
+ ),
+ zip(
+ make("InputWeightFormat", {WeightFormat::OHWI}),
+ make("OutputWeightFormat", {WeightFormat::OHWIo8})
+ )),
+ input_shape, output_shape, input_wf, output_wf)
+{
+ if(Scheduler::get().cpu_info().has_sve()){
+ arm_compute::NEReorderLayer reorder_layer;
+ int vector_length = arm_gemm::utils::get_vector_length<float>();
+ bool expected_bool_status = false;
+ if (vector_length == 8)
+ {
+ expected_bool_status = true;
+ }
+
+ TensorInfo input_tensor_info(input_shape, 1, DataType::F32);
+ TensorInfo output_tensor_info(output_shape, 1, DataType::F32);
+
+ Status status = reorder_layer.validate(&input_tensor_info, &output_tensor_info, input_wf, output_wf);
+
+ ARM_COMPUTE_EXPECT((expected_bool_status == bool(status)), framework::LogLevel::ERRORS);
+ }
+}
+
+FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), make("DataType", DataType::F32)))
+{
+ // Validate output
+ if (_hardware_supports)
+ {
+ validate(Accessor(_target), _reference);
+ }
+}
+#endif // ARM_COMPUTE_ENABLE_SVE
+
+FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE_END() // ReorderLayer
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // defined(__aarch64__)
diff --git a/tests/validation/NEON/ReshapeLayer.cpp b/tests/validation/NEON/ReshapeLayer.cpp
index bf39c399a5..e9f114d491 100644
--- a/tests/validation/NEON/ReshapeLayer.cpp
+++ b/tests/validation/NEON/ReshapeLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,9 @@ input_info, output_info, expected)
template <typename T>
using NEReshapeLayerFixture = ReshapeLayerValidationFixture<Tensor, Accessor, NEReshapeLayer, T>;
+template <typename T>
+using NEReshapeLayerPaddedFixture = ReshapeLayerPaddedValidationFixture<Tensor, Accessor, NEReshapeLayer, T>;
+
TEST_SUITE(Float)
TEST_SUITE(F32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F32)))
@@ -84,8 +87,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<float>, framework::Datase
// Validate output
validate(Accessor(_target), _reference);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() //F32
+TEST_SUITE_END() //Float
TEST_SUITE(Integer)
TEST_SUITE(S8)
@@ -94,7 +97,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int8_t>, framework::Datas
// Validate output
validate(Accessor(_target), _reference);
}
-TEST_SUITE_END()
+TEST_SUITE_END() //S8
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S16)))
@@ -102,11 +105,41 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int16_t>, framework::Data
// Validate output
validate(Accessor(_target), _reference);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() //S16
+TEST_SUITE_END() //Integer
+
+TEST_SUITE(Padded)
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() //S32
+TEST_SUITE_END() //Float
+
+TEST_SUITE(Integer)
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<int8_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S8)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() //S8
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S16)))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() //S16
+TEST_SUITE_END() //Integer
+TEST_SUITE_END() //Padded
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() //ReshapeLayer
+TEST_SUITE_END() //NEON
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/NEON/Reverse.cpp b/tests/validation/NEON/Reverse.cpp
index 3dc3eeee80..7b5337f14b 100644
--- a/tests/validation/NEON/Reverse.cpp
+++ b/tests/validation/NEON/Reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,8 @@ namespace validation
{
namespace
{
-auto run_small_dataset = combine(datasets::SmallShapes(), datasets::Tiny1DShapes());
+using framework::dataset::make;
+auto run_small_dataset = combine(datasets::Small3DShapes(), datasets::Tiny1DShapes());
auto run_large_dataset = combine(datasets::LargeShapes(), datasets::Tiny1DShapes());
} // namespace
@@ -53,28 +54,31 @@ TEST_SUITE(Reverse)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
+ make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis shape
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis length (> 4)
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 13U, 17U, 3U, 2U), 1, DataType::U8), // Unsupported source dimensions (>4)
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U), 1, DataType::U8),
}),
- framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
+ make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U, 13U, 2U), 1, DataType::U8),
+ TensorInfo(TensorShape(32U, 13U, 17U, 3U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
TensorInfo(TensorShape(2U), 1, DataType::U8),
})),
- framework::dataset::make("AxisInfo", { TensorInfo(TensorShape(3U), 1, DataType::U8),
+ make("AxisInfo", { TensorInfo(TensorShape(3U), 1, DataType::U8),
TensorInfo(TensorShape(2U, 10U), 1, DataType::U32),
TensorInfo(TensorShape(8U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
TensorInfo(TensorShape(2U), 1, DataType::U32),
+ TensorInfo(TensorShape(2U), 1, DataType::U32),
})),
- framework::dataset::make("Expected", { false, false, false, false, true, true})),
+ make("Expected", { false, false, false, false, false, true, true})),
src_info, dst_info, axis_info, expected)
{
Status s = NEReverse::validate(&src_info.clone()->set_is_resizable(false),
@@ -95,7 +99,11 @@ TEST_SUITE(F16)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEReverseFixture<half>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::F16),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -104,7 +112,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
NEReverseFixture<half>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::F16),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -116,7 +128,11 @@ TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEReverseFixture<float>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::F32)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::F32),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -125,7 +141,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
NEReverseFixture<float>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::F32)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::F32),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -138,7 +158,11 @@ TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall,
NEReverseFixture<uint8_t>,
framework::DatasetMode::PRECOMMIT,
- combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+ combine(
+ run_small_dataset,
+ make("DataType", DataType::QASYMM8),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -147,7 +171,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
FIXTURE_DATA_TEST_CASE(RunLarge,
NEReverseFixture<uint8_t>,
framework::DatasetMode::NIGHTLY,
- combine(run_large_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+ combine(
+ run_large_dataset,
+ make("DataType", DataType::QASYMM8),
+ make("use_negative_axis", { true, false }),
+ make("use_inverted_axis", { true, false })))
{
// Validate output
validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index 64427ae34f..f1209a21ac 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,16 +22,10 @@
* SOFTWARE.
*/
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEScale.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
#include "tests/datasets/ScaleValidationDataset.h"
-#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
-#include "tests/validation/Helpers.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/ScaleFixture.h"
@@ -51,7 +45,7 @@ using datasets::ScaleAlignCornersSamplingPolicySet;
/** We consider vector size in byte 64 since the maximum size of
* a vector used by the kernel is currently 64-byte (float32x4x4).
- * There are possibility to reduce test time further by using
+ * There is possibility to reduce test time further by using
* smaller vector sizes for different data types where applicable.
*/
constexpr uint32_t vector_byte = 64;
@@ -62,25 +56,31 @@ constexpr uint32_t num_elements_per_vector()
return vector_byte / sizeof(T);
}
-/** Scale data types */
-const auto ScaleDataTypes = framework::dataset::make("DataType",
+/** Quantization information data set */
+const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo",
{
- DataType::U8,
- DataType::S16,
- DataType::F32,
+ QuantizationInfo(0.5f, -10),
});
/** Quantization information data set */
-const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo",
+const auto InputQuantizationInfoSet = framework::dataset::make("InputQuantizationInfo",
{
QuantizationInfo(0.5f, -10),
});
+/** Quantization information data set */
+const auto OutputQuantizationInfoSet = framework::dataset::make("OutputQuantizationInfo",
+{
+ QuantizationInfo(0.2f, 20),
+});
+
/** Tolerance */
constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
+constexpr AbsoluteTolerance<int8_t> tolerance_s8(1);
constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
RelativeTolerance<float> tolerance_f32(0.05);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+constexpr float abs_tolerance_f16(0.01f);
RelativeTolerance<half> tolerance_f16(half(0.1));
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -155,8 +155,6 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL)
{ DataType::BFLOAT16, false },
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
{ DataType::F16, true },
-#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- { DataType::F16, false },
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
{ DataType::F32, true },
{ DataType::F64, false },
@@ -320,11 +318,14 @@ using NEScaleMixedDataLayoutFixture = ScaleValidationFixture<Tensor, Accessor, N
template <typename T>
using NEScaleQuantizedFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T>;
template <typename T>
+using NEScaleDifferentOutputQuantizedFixture = ScaleValidationDifferentOutputQuantizedFixture<Tensor, Accessor, NEScale, T>;
+template <typename T>
using NEScaleQuantizedMixedDataLayoutFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T, true>;
TEST_SUITE(Float)
TEST_SUITE(FP32)
-const auto f32_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<float>())), framework::dataset::make("DataType", DataType::F32));
+const auto f32_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<float>())), framework::dataset::make("DataType", DataType::F32));
+const auto f32_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F32));
FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleSamplingPolicySet))
{
//Create valid region
@@ -352,10 +353,38 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<float>, framework::D
// Validate output
validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
}
+FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
+}
TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-const auto f16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16));
+const auto f16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16));
+const auto f16_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F16));
FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleSamplingPolicySet))
{
//Create valid region
@@ -363,7 +392,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::A
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(Accessor(_target), _reference, valid_region, tolerance_f16);
+ validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet))
{
@@ -372,7 +401,34 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<half>, framework::Da
const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
// Validate output
- validate(Accessor(_target), _reference, valid_region, tolerance_f16);
+ validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture<half>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
}
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -400,6 +456,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<uint8_t>, framework:
validate(Accessor(_target), _reference, valid_region, tolerance_u8);
}
TEST_SUITE_END() // U8
+TEST_SUITE(S8)
+const auto s8_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int8_t>())), framework::dataset::make("DataType", DataType::S8));
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_s8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_s8);
+}
+TEST_SUITE_END() // S8
TEST_SUITE(S16)
const auto s16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int16_t>())), framework::dataset::make("DataType", DataType::S16));
FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(s16_shape, ScaleSamplingPolicySet))
@@ -435,7 +512,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::Da
// Validate output
validate(Accessor(_target), _reference, valid_region, tolerance_u8);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEScaleQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, QuantizationInfoSet))
+FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+ ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_u8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEScaleQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet,
+ QuantizationInfoSet))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -467,6 +555,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<int8_t>, framework::Dat
// Validate output
validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed);
}
+FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
+ ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed);
+}
FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleQuantizedFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleAlignCornersSamplingPolicySet,
QuantizationInfoSet))
{
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 2a9e30604e..94d0866c38 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,21 +25,22 @@
#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuSoftmaxKernel.h"
#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
#include "tests/framework/datasets/Datasets.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/SoftmaxLayerFixture.h"
-
namespace arm_compute
{
namespace test
{
namespace validation
{
+using framework::dataset::make;
namespace
{
/** Tolerance for float operations */
@@ -51,7 +52,7 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
DataType::F16,
@@ -62,56 +63,55 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
TEST_SUITE(NEON)
TEST_SUITE(SoftmaxLayer)
-
// *INDENT-OFF*
// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types
- TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes
- TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info
- QuantizationInfo(1.f/256, 12)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
- QuantizationInfo(1.f/256, 12)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis high
- QuantizationInfo(1.f/256, 12)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis low
- QuantizationInfo(1.f/256, 12)),
- }),
- framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16),
- TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
- TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8,
- QuantizationInfo(1.f/256, 12)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
- QuantizationInfo(1.f/256, 0)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
- QuantizationInfo(1.f/256, 0)),
- TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
- QuantizationInfo(1.f/256, 0)),
- })),
- framework::dataset::make("beta", { 1.0,
- 2.0,
- 1.0,
- 2.0,
- 1.0,
- 1.0,
- 2.0,
- 1.0,
- })),
- framework::dataset::make("axis", { 0,
- 0,
- 0,
- 1,
- 0,
- -1,
- 2,
- -3,
- })),
- framework::dataset::make("Expected", { false, false, false, true, true, true, false, false })),
- input_info, output_info, beta, axis, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info
+ QuantizationInfo(1.f/256, 12)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
+ QuantizationInfo(1.f/256, 12)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis high
+ QuantizationInfo(1.f/256, 12)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis low
+ QuantizationInfo(1.f/256, 12)),
+ }),
+ make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8,
+ QuantizationInfo(1.f/256, 12)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
+ QuantizationInfo(1.f/256, 0)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
+ QuantizationInfo(1.f/256, 0)),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
+ QuantizationInfo(1.f/256, 0)),
+ }),
+ make("beta", { 1.0,
+ 2.0,
+ 1.0,
+ 2.0,
+ 1.0,
+ 1.0,
+ 2.0,
+ 1.0,
+ }),
+ make("axis", { 0,
+ 0,
+ 0,
+ 1,
+ 0,
+ -1,
+ 2,
+ -3,
+ }),
+ make("Expected", { false, false, false, true, true, true, false, false })),
+ input_info, output_info, beta, axis, expected)
{
ARM_COMPUTE_EXPECT(bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, axis)) == expected, framework::LogLevel::ERRORS);
}
@@ -121,29 +121,80 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
template <typename T>
using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>;
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+ concat(
+ combine(
+ make("CpuExt", std::string("neon")),
+ make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED})
+ ),
+ combine(
+ make("CpuExt", std::string("sme2")),
+ make("DataType", { DataType::F32,
+ DataType::F16}))
+ ),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "neon");
+ cpu_isa.sme2 = (cpu_ext == "sme2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuSoftmaxKernel::get_implementation(
+ SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */, CPUInfo::get().get_sme2_vector_length()},
+ cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = cpu_ext + "_" + cpu_impl_dt(data_type) + "_softmax";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
+
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0, 1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0, -1 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ datasets::SmallShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0, 1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0, 2, -1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ datasets::Small4DShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f }),
+ make("Axis", { 0, 2, -1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
- framework::dataset::make("DataType", DataType::F16)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(
+ datasets::SoftmaxLayerLargeShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
@@ -152,26 +203,30 @@ TEST_SUITE_END() //FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0, -1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0, -1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0, -2, 3 })))
+FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(datasets::Small4DShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0, -2, 3 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
- framework::dataset::make("DataType", DataType::F32)),
- framework::dataset::make("Beta", { 1.0f, 2.0f })),
- framework::dataset::make("Axis", { 0 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::SoftmaxLayerLargeShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
@@ -184,29 +239,40 @@ using NESoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<Tensor,
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
- framework::dataset::make("Beta", { 1.0f, 2.f }))),
- framework::dataset::make("Axis", { 0, -1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::QASYMM8),
+ combine(
+ make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+ make("Beta", { 1.0f, 2.f })
+ ),
+ make("Axis", { 0, -1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small4DShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
- framework::dataset::make("Beta", { 1.0f, 2.f }))),
- framework::dataset::make("Axis", { 0, 1, -2 })))
+FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(
+ datasets::Small4DShapes(),
+ make("DataType", DataType::QASYMM8),
+ combine(
+ make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+ make("Beta", { 1.0f, 2.f })),
+ make("Axis", { 0, 1, -2 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
- framework::dataset::make("Beta", { 1.0f, 2.0f }))),
- framework::dataset::make("Axis", { 0 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(
+ datasets::SoftmaxLayerLargeShapes(),
+ make("DataType", DataType::QASYMM8),
+ combine(
+ make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+ make("Beta", { 1.0f, 2.0f })
+ ),
+ make("Axis", { 0 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -214,20 +280,28 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framew
TEST_SUITE_END() //QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
- framework::dataset::make("Beta", { 1.0f, 2.f }))),
- framework::dataset::make("Axis", { 0, -1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ combine(
+ make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+ make("Beta", { 1.0f, 2.f })
+ ),
+ make("Axis", { 0, -1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
-FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small4DShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
- framework::dataset::make("Beta", { 1.0f, 2.f }))),
- framework::dataset::make("Axis", { 0, 1, -1 })))
+FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(
+ datasets::Small4DShapes(),
+ make("DataType", DataType::QASYMM8_SIGNED),
+ combine(
+ make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+ make("Beta", { 1.0f, 2.f })
+ ),
+ make("Axis", { 0, 1, -1 })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
diff --git a/tests/validation/NEON/StackLayer.cpp b/tests/validation/NEON/StackLayer.cpp
index d88f713ccd..3828010c7b 100644
--- a/tests/validation/NEON/StackLayer.cpp
+++ b/tests/validation/NEON/StackLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,69 +44,74 @@ namespace test
{
namespace validation
{
+
+using framework::dataset::make;
namespace
{
// *INDENT-OFF*
// clang-format off
/** Data types */
-const auto data_types = framework::dataset::make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 });
+const auto data_types = make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 });
/** Num tensors values to test */
-const auto n_values = framework::dataset::make("NumTensors", { 3, 4 });
+const auto n_values = make("NumTensors", { 3, 4 });
/** Shapes 1D to test */
-const auto shapes_1d_small = combine(datasets::Small1DShapes(), framework::dataset::make("Axis", -1, 2));
+const auto shapes_1d_small = combine(datasets::Small1DShapes(), make("Axis", -1, 2));
/** Shapes 2D to test */
-const auto shapes_2d_small = combine(datasets::Small2DShapes(), framework::dataset::make("Axis", -2, 3));
+const auto shapes_2d_small = combine(datasets::Small2DShapes(), make("Axis", -2, 3));
/** Shapes 3D to test */
-const auto shapes_3d_small = combine(datasets::Small3DShapes(), framework::dataset::make("Axis", -3, 4));
+const auto shapes_3d_small = combine(datasets::Small3DShapes(), make("Axis", -3, 4));
/** Shapes 4D to test */
-const auto shapes_4d_small = combine(datasets::Small4DShapes(), framework::dataset::make("Axis", -4, 5));
+const auto shapes_4d_small = combine(datasets::Small4DShapes(), make("Axis", -4, 5));
/** Shapes 1D to test */
-const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2));
+const auto shapes_1d_large = combine(datasets::Large1DShapes(), make("Axis", -1, 2));
/** Shapes 2D to test */
-const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3));
+const auto shapes_2d_large = combine(datasets::Medium2DShapes(), make("Axis", -2, 3));
/** Shapes 3D to test */
-const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4));
+const auto shapes_3d_large = combine(datasets::Medium3DShapes(), make("Axis", -3, 4));
/** Shapes 4D to test */
-const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5));
+const auto shapes_4d_large = combine(datasets::Medium4DShapes(), make("Axis", -4, 5));
} // namespace
/** Fixture to use */
template<typename T>
using NEStackLayerFixture = StackLayerValidationFixture<Tensor, ITensor, Accessor, NEStackLayer, T>;
+template<typename T>
+using NEStackLayerWithPaddingFixture = StackLayerWithPaddingValidationFixture<Tensor, ITensor, Accessor, NEStackLayer, T>;
+
using namespace arm_compute::misc::shape_calculator;
TEST_SUITE(NEON)
TEST_SUITE(StackLayer)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
- framework::dataset::make("InputInfo",
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+make("InputInfo",
{
std::vector<TensorInfo>{ TensorInfo(TensorShape(9U, 8U), 1, DataType::U8) },
- std::vector<TensorInfo>{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)},
+ std::vector<TensorInfo>{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)},
std::vector<TensorInfo>{ TensorInfo(TensorShape(2U, 3U), 1, DataType::S32) },
- std::vector<TensorInfo>{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)},
+ std::vector<TensorInfo>{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)},
std::vector<TensorInfo>{ TensorInfo(TensorShape(9U, 8U), 1, DataType::S32) },
}),
-framework::dataset::make("OutputInfo",
+make("OutputInfo",
{
TensorInfo(TensorShape(1U, 9U, 8U), 1, DataType::U8), // Passes, stack 1 tensor on x axis
TensorInfo(TensorShape(1U, 3U, 2U), 1, DataType::U8), // Passes, stack 3 tensors on y axis
TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::S32), // fails axis < (- input's rank)
TensorInfo(TensorShape(3U, 7U, 5U), 1, DataType::S32), // fails, input dimensions > 4
TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::U8), // fails mismatching data types
-})),
-framework::dataset::make("Axis", { -3, 1, -4, -3, 1 })),
-framework::dataset::make("Expected", { true, true, false, false, false })),
+}),
+make("Axis", { -3, 1, -4, -3, 1 }),
+make("Expected", { true, true, false, false, false })),
input_info, output_info, axis, expected)
{
std::vector<TensorInfo> ti(input_info);
@@ -121,18 +126,18 @@ input_info, output_info, axis, expected)
TEST_SUITE(Shapes1D)
TEST_SUITE(S32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL,
- combine(combine(shapes_1d_small,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_1d_small,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_1d_large,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_1d_large,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -141,18 +146,18 @@ TEST_SUITE_END() // S32
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL,
- combine(combine(shapes_1d_small,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_1d_small,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_1d_large,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_1d_large,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -161,18 +166,18 @@ TEST_SUITE_END() // S16
TEST_SUITE(S8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL,
- combine(combine(shapes_1d_small,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_1d_small,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_1d_large,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_1d_large,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -183,18 +188,18 @@ TEST_SUITE_END() // Shapes1D
TEST_SUITE(Shapes2D)
TEST_SUITE(S32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL,
- combine(combine(shapes_2d_small,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_2d_small,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_2d_large,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_2d_large,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -203,18 +208,18 @@ TEST_SUITE_END() // S32
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL,
- combine(combine(shapes_2d_small,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_2d_small,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_2d_large,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_2d_large,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -223,18 +228,18 @@ TEST_SUITE_END() // S16
TEST_SUITE(S8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL,
- combine(combine(shapes_2d_small,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_2d_small,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_2d_large,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_2d_large,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -245,18 +250,18 @@ TEST_SUITE_END() // Shapes2D
TEST_SUITE(Shapes3D)
TEST_SUITE(S32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL,
- combine(combine(shapes_3d_small,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_3d_small,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_3d_large,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_3d_large,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -265,18 +270,18 @@ TEST_SUITE_END() // S32
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL,
- combine(combine(shapes_3d_small,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_3d_small,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_3d_large,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_3d_large,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -285,18 +290,18 @@ TEST_SUITE_END() // S16
TEST_SUITE(S8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL,
- combine(combine(shapes_3d_small,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_3d_small,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_3d_large,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_3d_large,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -307,18 +312,29 @@ TEST_SUITE_END() // Shapes3D
TEST_SUITE(Shapes4D)
TEST_SUITE(S32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL,
- combine(combine(shapes_4d_small,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_4d_small,
+ make("DataType", { DataType::S32 }),
+ n_values))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+
+// Testing the case with padding for only 4d shapes and for one data type. This is because the underlying code
+// path depends only on the padding, which isn't affected by the shapes or data types.
+FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, NEStackLayerWithPaddingFixture<int>, framework::DatasetMode::ALL,
+ combine(shapes_4d_small,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_4d_large,
- framework::dataset::make("DataType", { DataType::S32 })),
- n_values))
+ combine(shapes_4d_large,
+ make("DataType", { DataType::S32 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -327,18 +343,18 @@ TEST_SUITE_END() // S32
TEST_SUITE(S16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL,
- combine(combine(shapes_4d_small,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_4d_small,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_4d_large,
- framework::dataset::make("DataType", { DataType::S16 })),
- n_values))
+ combine(shapes_4d_large,
+ make("DataType", { DataType::S16 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
@@ -347,24 +363,37 @@ TEST_SUITE_END() // S16
TEST_SUITE(S8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL,
- combine(combine(shapes_4d_small,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_4d_small,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY,
- combine(combine(shapes_4d_large,
- framework::dataset::make("DataType", { DataType::S8 })),
- n_values))
+ combine(shapes_4d_large,
+ make("DataType", { DataType::S8 }),
+ n_values))
{
// Validate output
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // S8
TEST_SUITE_END() // Shapes4D
+
+TEST_SUITE(HighDimensional)
+// The Cpu implementation supports tensors of size 4D+, but reference implementation does not.
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, NEStackLayerFixture<char>, framework::DatasetMode::DISABLED,
+ combine(make("Shape", { TensorShape{2U, 3U, 4U, 5U, 3U} }),
+ make("Axis", { 5, 0, -3, 2 }),
+ make("DataType", { DataType::S8 }),
+ make("NumTensors", { 3 })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // HighDimensional
TEST_SUITE_END() // StackLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp
index 819811943d..e126aded28 100644
--- a/tests/validation/NEON/UNIT/RuntimeContext.cpp
+++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,24 @@ namespace validation
{
TEST_SUITE(NEON)
TEST_SUITE(UNIT)
+#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
+TEST_CASE(CpuCapacity, framework::DatasetMode::ALL)
+{
+ CPUInfo& ci = arm_compute::Scheduler::get().cpu_info();
+ const uint32_t total_num_cpus = ci.get_cpu_num();
+ const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little();
+ const bool has_lmb = ci.cpu_has_little_mid_big();
+ const uint32_t num_threads = arm_compute::Scheduler::get().num_threads();
+
+ if(has_lmb){
+ ARM_COMPUTE_EXPECT(total_num_cpus!=nonlittle_num_cpus , framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(num_threads==nonlittle_num_cpus , framework::LogLevel::ERRORS);
+ }
+}
+#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+
TEST_SUITE(RuntimeContext)
TEST_CASE(Scheduler, framework::DatasetMode::ALL)
diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp
index d84bcd4a20..0aab9ef9b5 100644
--- a/tests/validation/NEON/UNIT/TensorAllocator.cpp
+++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp
@@ -193,7 +193,7 @@ TEST_CASE(ImportMemoryMallocPadded, framework::DatasetMode::ALL)
ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
}
-#if !defined(BARE_METAL)
+#if !defined(_WIN64) && !defined(BARE_METAL)
TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
{
const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
@@ -250,7 +250,7 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
tensor.allocator()->free();
ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
}
-#endif // !defined(BARE_METAL)
+#endif // !defined(_WIN64) && !defined(BARE_METAL)
TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL)
{
diff --git a/tests/validation/UNIT/CPPScheduler.cpp b/tests/validation/UNIT/CPPScheduler.cpp
new file mode 100644
index 0000000000..6a3f6819fc
--- /dev/null
+++ b/tests/validation/UNIT/CPPScheduler.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CPP/CPPScheduler.h"
+
+#include "arm_compute/core/CPP/ICPPKernel.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+
+#include <stdexcept>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+
+namespace
+{
+class TestException: public std::exception
+{
+public:
+ const char* what() const noexcept override
+ {
+ return "Expected test exception";
+ }
+};
+
+class TestKernel: public ICPPKernel
+{
+public:
+ TestKernel()
+ {
+ Window window;
+ window.set(0, Window::Dimension(0, 2));
+ configure(window);
+ }
+
+ const char* name() const override
+ {
+ return "TestKernel";
+ }
+
+ void run(const Window &, const ThreadInfo &) override
+ {
+ throw TestException();
+ }
+
+};
+}
+
+TEST_SUITE(UNIT)
+TEST_SUITE(CPPScheduler)
+#if defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL)
+TEST_CASE(RethrowException, framework::DatasetMode::ALL)
+{
+ CPPScheduler scheduler;
+ CPPScheduler::Hints hints(0);
+ TestKernel kernel;
+
+ scheduler.set_num_threads(2);
+ try
+ {
+ scheduler.schedule(&kernel, hints);
+ }
+ catch(const TestException&)
+ {
+ return;
+ }
+ ARM_COMPUTE_EXPECT_FAIL("Expected exception not caught", framework::LogLevel::ERRORS);
+}
+#endif // defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL)
+TEST_SUITE_END()
+TEST_SUITE_END()
diff --git a/tests/validation/UNIT/GPUTarget.cpp b/tests/validation/UNIT/GPUTarget.cpp
index d2c81cf778..2e64635b7a 100644
--- a/tests/validation/UNIT/GPUTarget.cpp
+++ b/tests/validation/UNIT/GPUTarget.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,6 +37,7 @@ TEST_SUITE(GPUTarget)
TEST_CASE(GetGPUTargetFromName, framework::DatasetMode::ALL)
{
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T000") == GPUTarget::MIDGARD, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T600") == GPUTarget::T600, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T700") == GPUTarget::T700, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T800") == GPUTarget::T800, framework::LogLevel::ERRORS);
@@ -45,15 +46,24 @@ TEST_CASE(GetGPUTargetFromName, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51") == GPUTarget::G51, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51BIG") == GPUTarget::G51BIG, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51LIT") == GPUTarget::G51LIT, framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52") == GPUTarget::G52, framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52LIT") == GPUTarget::G52LIT, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G31") == GPUTarget::G31, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G76") == GPUTarget::G76, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G76 r0p0") == GPUTarget::G76, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52") == GPUTarget::G52, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52LIT") == GPUTarget::G52LIT, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G77") == GPUTarget::G77, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G57") == GPUTarget::G57, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78") == GPUTarget::G78, framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78AE") == GPUTarget::G78, framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(get_target_from_name("Mali-TODX") == GPUTarget::TODX, framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T000") == GPUTarget::MIDGARD, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G68") == GPUTarget::G68, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78AE") == GPUTarget::G78AE, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G710") == GPUTarget::G710, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G610") == GPUTarget::G610, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G510") == GPUTarget::G510, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G310") == GPUTarget::G310, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G715") == GPUTarget::G715, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G615") == GPUTarget::G615, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G720") == GPUTarget::G720, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G620") == GPUTarget::G620, framework::LogLevel::ERRORS);
}
TEST_CASE(GPUTargetIsIn, framework::DatasetMode::ALL)
diff --git a/tests/validation/UNIT/SubTensorInfo.cpp b/tests/validation/UNIT/SubTensorInfo.cpp
index 5a930620ce..ca5e46550c 100644
--- a/tests/validation/UNIT/SubTensorInfo.cpp
+++ b/tests/validation/UNIT/SubTensorInfo.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -69,6 +69,7 @@ TEST_CASE(SubTensorCreation, framework::DatasetMode::ALL)
* - A) Extend padding when SubTensor XY does not match parent tensor should fail
* B) Extend with zero padding when SubTensor XY does not match parent tensor should succeed
* - C) Extend padding when SubTensor XY matches parent tensor should succeed
+ * - D) Set lock padding to true, so that extend padding would fail
*/
TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL)
{
@@ -95,6 +96,14 @@ TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(tensor_info.padding().top == 2, framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(tensor_info.padding().right == 1, framework::LogLevel::ERRORS);
}
+
+ // Test D
+ {
+ TensorInfo tensor_info(TensorShape(23U, 17U, 3U), 1, DataType::F32);
+ SubTensorInfo sub_tensor_info(&tensor_info, TensorShape(4U, 3U, 1U), Coordinates(5, 2, 1));
+ sub_tensor_info.set_lock_paddings(true);
+ ARM_COMPUTE_EXPECT_THROW(sub_tensor_info.extend_padding(PaddingSize(2, 1)), framework::LogLevel::ERRORS);
+ }
}
TEST_SUITE_END() // SubTensorInfo
diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp
index 50b26293c9..b79c1e9253 100644
--- a/tests/validation/UNIT/TensorInfo.cpp
+++ b/tests/validation/UNIT/TensorInfo.cpp
@@ -184,8 +184,17 @@ TEST_CASE(SymmPerChannelQuantizationInfo, framework::DatasetMode::ALL)
ARM_COMPUTE_EXPECT(info.quantization_info().offset().empty(), framework::LogLevel::ERRORS);
}
-TEST_SUITE_END() // TensorInfoValidation
-TEST_SUITE_END()
+/** Validates lock paddings flag*/
+TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL)
+{
+ TensorInfo tensor_info(TensorShape(23U, 17U, 3U), 1, DataType::F32);
+ tensor_info.set_lock_paddings(true);
+
+ // Now lock padding is set to true, therefore the extend padding would fail
+ ARM_COMPUTE_EXPECT_THROW(tensor_info.extend_padding(PaddingSize(2, 1)), framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // TensorInfo
+TEST_SUITE_END() // UNIT
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index f1ce0fecc7..289aca4d08 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,17 @@ namespace test
{
namespace validation
{
+namespace
+{
+// Compare if 2 values are both infinities and if they are "equal" (has the same sign)
+template <typename T>
+inline bool are_equal_infs(T val0, T val1)
+{
+ const auto same_sign = support::cpp11::signbit(val0) == support::cpp11::signbit(val1);
+ return (!support::cpp11::isfinite(val0)) && (!support::cpp11::isfinite(val1)) && same_sign;
+}
+} // namespace
+
/** Class reprensenting an absolute tolerance value. */
template <typename T>
class AbsoluteTolerance
@@ -140,7 +151,7 @@ bool compare_dimensions(const Dimensions<T> &dimensions1, const Dimensions<T> &d
{
ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN);
- if(data_layout == DataLayout::NCHW)
+ if(data_layout != DataLayout::NHWC)
{
if(dimensions1.num_dimensions() != dimensions2.num_dimensions())
{
@@ -296,9 +307,9 @@ struct compare<AbsoluteTolerance<U>> : public compare_base<AbsoluteTolerance<U>>
/** Perform comparison */
operator bool() const
{
- if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
+ if(are_equal_infs(this->_target, this->_reference))
{
- return false;
+ return true;
}
else if(this->_target == this->_reference)
{
@@ -322,9 +333,9 @@ struct compare<RelativeTolerance<U>> : public compare_base<RelativeTolerance<U>>
/** Perform comparison */
operator bool() const
{
- if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
+ if(are_equal_infs(this->_target, this->_reference))
{
- return false;
+ return true;
}
else if(this->_target == this->_reference)
{
@@ -494,9 +505,9 @@ void validate_wrap(const IAccessor &tensor, const SimpleTensor<T> &reference, co
// check for wrapping
if(!equal)
{
- if(!support::cpp11::isfinite(target_value) || !support::cpp11::isfinite(reference_value))
+ if(are_equal_infs(target_value, reference_value))
{
- equal = false;
+ equal = true;
}
else
{
diff --git a/tests/validation/cpu/unit/Context.cpp b/tests/validation/cpu/unit/Context.cpp
index 57ca866032..42247ba1da 100644
--- a/tests/validation/cpu/unit/Context.cpp
+++ b/tests/validation/cpu/unit/Context.cpp
@@ -94,13 +94,13 @@ TEST_CASE(CpuCapabilities, framework::DatasetMode::ALL)
opts.copts.capabilities = AclCpuCapabilitiesDot | AclCpuCapabilitiesMmlaInt8 | AclCpuCapabilitiesSve2;
arm_compute::cpu::CpuContext ctx(&opts.copts);
- ARM_COMPUTE_ASSERT(ctx.capabilities().dot == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().mmla_int8 == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().sve2 == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().fp16 == false);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_dotprod() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_i8mm() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_sve2() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_fp16() == false);
arm_compute::cpu::CpuContext ctx_legacy(nullptr);
- ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().neon == true);
+ ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().cpu_info.has_neon() == true);
}
TEST_SUITE_END() // Context
diff --git a/tests/validation/dynamic_fusion/Utils.h b/tests/validation/dynamic_fusion/Utils.h
new file mode 100644
index 0000000000..72e9ec5955
--- /dev/null
+++ b/tests/validation/dynamic_fusion/Utils.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef TESTS_VALIDATION_DYNAMIC_FUSION_UTILS
+#define TESTS_VALIDATION_DYNAMIC_FUSION_UTILS
+
+#include "tests/AssetsLibrary.h"
+#include "utils/Utils.h"
+
+#include <chrono>
+#include <limits>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace utils
+{
+/** A pair of macros which measures the wall clock time, and records it into a map measurement_map with name clock_name
+ *
+ */
+#define TICK(clock_name) \
+ auto clock_name##_tick = std::chrono::high_resolution_clock::now();
+#define TOCK(clock_name, measurement_map) \
+ auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \
+ measurement_map["\"" #clock_name "\""] = duration_cast<microseconds>(clock_name##_tock - clock_name##_tick);
+#define TOCK_AVG(clock_name, measurement_map, num_iterations) \
+ auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \
+ measurement_map["\"" #clock_name "\""] = duration_cast<microseconds>((clock_name##_tock - clock_name##_tick) / (num_iterations));
+
+template <typename T, typename U>
+void fill(U &&tensor, int seed, AssetsLibrary *library)
+{
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
+ library->fill(tensor, distribution, seed);
+
+ // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
+ library->fill_borders_with_garbage(tensor, distribution_inf, seed);
+}
+} // namespace utils
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* TESTS_VALIDATION_DYNAMIC_FUSION_UTILS */
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp
new file mode 100644
index 0000000000..453983c077
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/dynamic_fusion/Utils.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/DepthConvertLayer.h"
+#include "tests/validation/reference/DepthwiseConvolutionLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+using namespace arm_compute::test::validation::utils;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(INTEGRATION)
+TEST_SUITE(DYNAMIC_FUSION)
+
+TEST_CASE(Conv2d, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out = conv2d1x1(direct_conv)(input, weights, bias)
+ */
+ CLScheduler::get().default_reinit();
+
+ const auto data_type = DataType::F32;
+ const auto data_layout = DataLayout::NHWC;
+ const auto t_input_shape = TensorShape(384, 12, 12);
+ const auto t_weight_shape = TensorShape(384, 1, 1, 16);
+ const auto t_dst_shape = TensorShape(16, 12, 12);
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Fuse conv2d
+ Conv2dAttributes conv2d_attr{};
+ ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
+
+ ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
+
+ ITensorInfo *dst_info = context.create_tensor_info();
+ GpuOutput::create_op(sketch, conv_out_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ // auto buf = cl::Buffer();
+ // tensor->allocator()->import_memory(buf); // Or, import external memory
+ }
+
+ // Construct user tensors
+ CLTensor t_input{};
+ CLTensor t_weight{};
+ CLTensor t_dst{};
+
+ // Initialize user tensors
+ t_input.allocator()->init(*input_info);
+ t_weight.allocator()->init(*weight_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_input.allocator()->allocate();
+ t_weight.allocator()->allocate();
+ t_dst.allocator()->allocate();
+ fill<float>(CLAccessor(t_input), 0, library.get());
+ fill<float>(CLAccessor(t_weight), 1, library.get());
+
+ // Run runtime
+ runtime.run({&t_input, &t_weight, &t_dst});
+
+ // Create reference
+ SimpleTensor<float> ref_t_input{t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
+ SimpleTensor<float> ref_t_weight{t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
+ SimpleTensor<float> ref_t_bias_placeholder{t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
+
+ // Fill reference
+ fill<float>(ref_t_input, 0, library.get());
+ fill<float>(ref_t_weight, 1, library.get());
+
+ auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U));
+ auto ref_t_weight_nchw = reference::permute(ref_t_weight, PermutationVector(1U, 2U, 0U));
+ auto ref_t_bias_placeholder_nchw = reference::permute(ref_t_bias_placeholder, PermutationVector(1U, 2U, 0U));
+ auto t_dst_shape_nchw = t_dst_shape;
+ permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+ PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left,
+ conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom,
+ DimensionRoundingType{});
+ auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw,
+ t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation());
+ const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U));
+
+ RelativeTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+ validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32);
+}
+
+TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out_0 = in_0 + in_1
+ * out_1 = out_0 + in_2
+ */
+ CLScheduler::get().default_reinit();
+
+ const auto data_type = DataType::F32;
+ const auto t_input_shape = TensorShape(33, 3, 2);
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
+
+ ITensorInfo *out_0_info = context.create_tensor_info();
+ ITensorInfo *out_1_info = context.create_tensor_info();
+
+ ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info);
+ GpuOutput::create_op(sketch, ans_0_info, out_0_info);
+ ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info);
+ GpuOutput::create_op(sketch, ans_1_info, out_1_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ // auto buf = cl::Buffer();
+ // tensor->allocator()->import_memory(buf); // Or, import external memory
+ }
+
+ // Construct user tensors
+ CLTensor t_in_0{};
+ CLTensor t_in_1{};
+ CLTensor t_in_2{};
+
+ CLTensor t_out_0{};
+ CLTensor t_out_1{};
+
+ // Initialize user tensors
+ t_in_0.allocator()->init(*in_0_info);
+ t_in_1.allocator()->init(*in_1_info);
+ t_in_2.allocator()->init(*in_2_info);
+
+ t_out_0.allocator()->init(*out_0_info);
+ t_out_1.allocator()->init(*out_1_info);
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_in_0.allocator()->allocate();
+ t_in_1.allocator()->allocate();
+ t_in_2.allocator()->allocate();
+
+ t_out_0.allocator()->allocate();
+ t_out_1.allocator()->allocate();
+
+ fill<float>(CLAccessor(t_in_0), 0, library.get());
+ fill<float>(CLAccessor(t_in_1), 1, library.get());
+ fill<float>(CLAccessor(t_in_2), 2, library.get());
+
+ // Run runtime
+ runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1});
+
+ // Create reference
+ SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()};
+
+ SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_out_1{t_input_shape, data_type, 1, QuantizationInfo()};
+
+ // Fill reference
+ fill<float>(ref_t_in_0, 0, library.get());
+ fill<float>(ref_t_in_1, 1, library.get());
+ fill<float>(ref_t_in_2, 2, library.get());
+
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP);
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1,
+ ConvertPolicy::WRAP);
+
+ RelativeTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+ validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_f32);
+ validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_f32);
+}
+TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out_0 = in_0 + in_1
+ * out_1 = float(int32_t(out_0 + in_2))
+ */
+ CLScheduler::get().default_reinit();
+
+ const auto data_type = DataType::F32;
+ const auto t_input_shape = TensorShape(3, 8, 5);
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
+
+ ITensorInfo *out_0_info = context.create_tensor_info();
+ ITensorInfo *out_1_info = context.create_tensor_info();
+
+ CastAttributes cast_0_attr;
+ cast_0_attr.data_type(DataType::F16);
+
+ CastAttributes cast_1_attr;
+ cast_1_attr.data_type(DataType::F32);
+
+ ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info);
+ GpuOutput::create_op(sketch, ans_0_info, out_0_info);
+ ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info);
+ ITensorInfo *ans_2_info = GpuCast::create_op(sketch, ans_1_info, cast_0_attr);
+ ITensorInfo *ans_3_info = GpuCast::create_op(sketch, ans_2_info, cast_1_attr);
+ GpuOutput::create_op(sketch, ans_3_info, out_1_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ // auto buf = cl::Buffer();
+ // tensor->allocator()->import_memory(buf); // Or, import external memory
+ }
+
+ // Construct user tensors
+ CLTensor t_in_0{};
+ CLTensor t_in_1{};
+ CLTensor t_in_2{};
+
+ CLTensor t_out_0{};
+ CLTensor t_out_1{};
+
+ // Initialize user tensors
+ t_in_0.allocator()->init(*in_0_info);
+ t_in_1.allocator()->init(*in_1_info);
+ t_in_2.allocator()->init(*in_2_info);
+
+ t_out_0.allocator()->init(*out_0_info);
+ t_out_1.allocator()->init(*out_1_info);
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_in_0.allocator()->allocate();
+ t_in_1.allocator()->allocate();
+ t_in_2.allocator()->allocate();
+
+ t_out_0.allocator()->allocate();
+ t_out_1.allocator()->allocate();
+
+ fill<float>(CLAccessor(t_in_0), 0, library.get());
+ fill<float>(CLAccessor(t_in_1), 1, library.get());
+ fill<float>(CLAccessor(t_in_2), 2, library.get());
+
+ // Run runtime
+ runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1});
+
+ // Create reference
+ SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()};
+
+ SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_ans_1{t_input_shape, data_type, 1, QuantizationInfo()};
+
+ // Fill reference
+ fill<float>(ref_t_in_0, 0, library.get());
+ fill<float>(ref_t_in_1, 1, library.get());
+ fill<float>(ref_t_in_2, 2, library.get());
+
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP);
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1,
+ ConvertPolicy::WRAP);
+ const auto ref_t_ans_2 =
+ reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0);
+ const auto ref_t_out_1 =
+ reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0);
+
+ RelativeTolerance<float> tolerance_add_f32(0.001f);
+ AbsoluteTolerance<float> tolerance_cast_f32(1.0f);
+ validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_add_f32);
+ validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_cast_f32);
+}
+
+/// TODO: COMPMID-6593 : This integration test fails with CKW backend.
+/// It was not enabled for CKW before, therefore went unnoticed.
+TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::DISABLED)
+{
+ // (tensor0)
+ // |
+ // ======|============================================== Sketch 0
+ // | (tensor1) +---- (tensor2)
+ // | | | |
+ // +-- input -- weights -- biases --+ |
+ // | | |
+ // | Conv2d | |
+ // | | |
+ // +----------- output -------------+ |
+ // | |
+ // +-- input ---+ |
+ // | | |
+ // | Sigmoid | |
+ // | | |
+ // +-- output --+ |
+ // | |
+ // +-- input ---+ |
+ // | | |
+ // | Output | |
+ // | | |
+ // +-- output --+ |
+ // | |
+ // (tensor5) |
+ // | |
+ // +--------+ |
+ // ======|=============================|================ Sketch 1
+ // | (tensor3) (tensor4) |
+ // | | | |
+ // +-- input -- weights -- biases --+ |
+ // | | |
+ // | DepthwiseConv2d | |
+ // | | |
+ // +----------- output -------------+ |
+ // | |
+ // +--+ +----------------+
+ // | |
+ // +-- lhs -- rhs --+
+ // | |
+ // | Multiply |
+ // | |
+ // +---- output ----+
+ // |
+ // +-- input ---+
+ // | |
+ // | Output |
+ // | |
+ // +-- output --+
+ // |
+ // (tensor6)
+
+ TensorShape conv2d_src_shape(10, 20, 30);
+ TensorShape conv2d_wei_shape(10, 3, 3, 5);
+ TensorShape conv2d_bia_shape(5);
+ TensorShape conv2d_dst_shape(5, 18, 28);
+ TensorShape dwc_wei_shape(5, 3, 3);
+ TensorShape dwc_bia_shape(5);
+ TensorShape dwc_dst_shape(5, 16, 26);
+
+ // Initialize the context.
+ CLScheduler::get().default_reinit();
+
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context(&cl_compile_ctx);
+
+ auto tensor0_info = context.create_tensor_info(conv2d_src_shape, 1, DataType::F32, DataLayout::NHWC);
+
+ // Create the first sketch: conv2d + cast + output.
+ GpuWorkloadSketch sketch0(&context);
+
+ Conv2dAttributes conv2d_attr;
+ auto tensor1_info = context.create_tensor_info(conv2d_wei_shape, 1, DataType::F32, DataLayout::NHWC);
+ auto tensor2_info = context.create_tensor_info(conv2d_bia_shape, 1, DataType::F32, DataLayout::NHWC);
+ ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr),
+ framework::LogLevel::ERRORS);
+ auto ans_info = GpuConv2d::create_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr);
+
+ ARM_COMPUTE_EXPECT(GpuSigmoid::validate_op(sketch0, ans_info), framework::LogLevel::ERRORS);
+ ans_info = GpuSigmoid::create_op(sketch0, ans_info);
+
+ DepthwiseConv2dAttributes dwc_attr;
+ auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC);
+ auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC);
+ ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, tensor3_info, tensor4_info, dwc_attr),
+ framework::LogLevel::ERRORS);
+
+ auto tensor5_info = context.create_tensor_info();
+ ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, tensor5_info), framework::LogLevel::ERRORS);
+ GpuOutput::create_op(sketch0, ans_info, tensor5_info);
+
+ // Create the first workload runtime.
+ ClWorkloadRuntime runtime0;
+ runtime0.configure(sketch0);
+
+ // Create the second sketch: dwc + sigmoid + output.
+ GpuWorkloadSketch sketch1(&context);
+
+ ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr),
+ framework::LogLevel::ERRORS);
+ ans_info = GpuDepthwiseConv2d::create_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr);
+
+ ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, tensor2_info), framework::LogLevel::ERRORS);
+ ans_info = GpuMul::create_op(sketch1, ans_info, tensor2_info);
+
+ auto tensor6_info = context.create_tensor_info();
+ ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, tensor6_info), framework::LogLevel::ERRORS);
+ GpuOutput::create_op(sketch1, ans_info, tensor6_info);
+
+ // Create the second workload runtime.
+ ClWorkloadRuntime runtime1;
+ runtime1.configure(sketch1);
+
+ // Create the user tensors.
+ CLTensor tensor0;
+ CLTensor tensor1;
+ CLTensor tensor2;
+ CLTensor tensor3;
+ CLTensor tensor4;
+ CLTensor tensor5;
+ CLTensor tensor6;
+
+ tensor0.allocator()->init(*tensor0_info);
+ tensor1.allocator()->init(*tensor1_info);
+ tensor2.allocator()->init(*tensor2_info);
+ tensor3.allocator()->init(*tensor3_info);
+ tensor4.allocator()->init(*tensor4_info);
+ tensor5.allocator()->init(*tensor5_info);
+ tensor6.allocator()->init(*tensor6_info);
+
+ tensor0.allocator()->allocate();
+ tensor1.allocator()->allocate();
+ tensor2.allocator()->allocate();
+ tensor3.allocator()->allocate();
+ tensor4.allocator()->allocate();
+ tensor5.allocator()->allocate();
+ tensor6.allocator()->allocate();
+
+ // Allocate the auxiliary tensors.
+ for (auto &data : runtime0.get_auxiliary_tensors())
+ {
+ auto tensor = std::get<0>(data);
+ auto &tensor_info = std::get<1>(data);
+ auto mem_req = std::get<2>(data);
+
+ tensor->allocator()->init(tensor_info, mem_req.alignment);
+ tensor->allocator()->allocate();
+ }
+
+ for (auto &data : runtime1.get_auxiliary_tensors())
+ {
+ auto tensor = std::get<0>(data);
+ auto &tensor_info = std::get<1>(data);
+ auto mem_req = std::get<2>(data);
+
+ tensor->allocator()->init(tensor_info, mem_req.alignment);
+ tensor->allocator()->allocate();
+ }
+
+ // Fill the input tensors with random data.
+ fill<float>(CLAccessor(tensor0), 0, library.get());
+ fill<float>(CLAccessor(tensor1), 1, library.get());
+ fill<float>(CLAccessor(tensor2), 2, library.get());
+ fill<float>(CLAccessor(tensor3), 3, library.get());
+ fill<float>(CLAccessor(tensor4), 4, library.get());
+
+ // Run each runtime.
+ runtime0.run({&tensor0, &tensor1, &tensor2, &tensor5});
+ runtime1.run({&tensor5, &tensor3, &tensor4, &tensor2, &tensor6});
+
+ // Compute the reference result.
+ SimpleTensor<float> ref_conv2d_src(conv2d_src_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+ SimpleTensor<float> ref_conv2d_wei(conv2d_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+ SimpleTensor<float> ref_conv2d_bia(conv2d_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+ SimpleTensor<float> ref_dwc_wei(dwc_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+ SimpleTensor<float> ref_dwc_bia(dwc_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+
+ fill<float>(ref_conv2d_src, 0, library.get());
+ fill<float>(ref_conv2d_wei, 1, library.get());
+ fill<float>(ref_conv2d_bia, 2, library.get());
+ fill<float>(ref_dwc_wei, 3, library.get());
+ fill<float>(ref_dwc_bia, 4, library.get());
+
+ PermutationVector nhwc_to_nchw(1, 2, 0);
+
+ auto conv2d_dst_shape_nchw = conv2d_dst_shape;
+ permute(conv2d_dst_shape_nchw, nhwc_to_nchw);
+ const auto ref_conv2d_src_nchw = reference::permute(ref_conv2d_src, nhwc_to_nchw);
+ const auto ref_conv2d_wei_nchw = reference::permute(ref_conv2d_wei, nhwc_to_nchw);
+ const auto ref_conv2d_bia_nchw = reference::permute(ref_conv2d_bia, nhwc_to_nchw);
+ const auto ref_conv2d_dst_nchw = reference::convolution_layer(
+ ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo());
+
+ const auto ref_sigmoid_dst_nchw = reference::activation_layer(
+ ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+
+ auto dwc_dst_shape_nchw = dwc_dst_shape;
+ permute(dwc_dst_shape_nchw, nhwc_to_nchw);
+ const auto ref_dwc_wei_nchw = reference::permute(ref_dwc_wei, nhwc_to_nchw);
+ const auto ref_dwc_bia_nchw = reference::permute(ref_dwc_bia, nhwc_to_nchw);
+ const auto ref_dwc_dst_nchw = reference::depthwise_convolution(
+ ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1);
+
+ const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>(
+ ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP,
+ DataType::F32);
+
+ constexpr RelativeTolerance<float> tolerance(0.001f);
+ validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance);
+}
+
+TEST_SUITE(Invalid_Fusion_Should_Fail)
+TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out = conv2d(conv2d(l0_input, l0_weight), l1_weight)
+ */
+ CLScheduler::get().default_reinit();
+
+ const auto data_type = DataType::F32;
+ const auto data_layout = DataLayout::NHWC;
+ const auto t_input_shape = TensorShape(384, 12, 12);
+ const auto t_weight_shape = TensorShape(384, 1, 1, 16);
+ auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout);
+ auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout);
+ auto t_dst_info = TensorInfo();
+
+ Conv2dAttributes conv2d_attr{};
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create tensor infos
+ ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
+ ITensorInfo *dst_info;
+
+ // Fuse conv2d into the workload
+ {
+ // Validate operator
+ const Status success = GpuConv2d::validate_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
+ ARM_COMPUTE_EXPECT(bool(success), framework::LogLevel::ERRORS);
+
+ dst_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
+ }
+
+ // Create tensor infos
+ ITensorInfo *weight_info_2 = context.create_tensor_info(t_weight_info);
+
+ // Fuse conv2d into the workload
+ {
+ // Validate operator, should fail
+ const Status success = GpuConv2d::validate_op(sketch, dst_info, weight_info_2, nullptr, conv2d_attr);
+ const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload";
+
+ ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos),
+ framework::LogLevel::ERRORS);
+ }
+}
+TEST_SUITE_END() // Invalid_Fusion_Should_Fail
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // INTEGRATION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
new file mode 100644
index 0000000000..9bfdc961fe
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/DynamicFusionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No quantized tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(ADD)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
+ TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
+ TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { true, false, true, true, false, true, false, false, true, false, false, true})),
+ input1_info, input2_info, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Validate Elementwise Add
+ auto lhs_info = context.create_tensor_info(input1_info);
+ auto rhs_info = context.create_tensor_info(input2_info);
+
+ bool res = bool(GpuAdd::validate_op(sketch, lhs_info, rhs_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+constexpr AbsoluteTolerance<float> tolerance_f(
+ 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 and DataType::F16 */
+constexpr float tolerance_num = 0.0001f; /**< Tolerance number */
+
+template <typename T>
+using DynamicFusionCLAddFixture =
+ DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+
+template <typename T>
+using DynamicFusionCLAddBroadcastFixture =
+ DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+
+template <typename T>
+using DynamicFusionCLAddTwoOpsFixture =
+ DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLAddFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLAddFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::TemporaryLimitedLargeShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(
+ RunSmallTwoOps,
+ DynamicFusionCLAddTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})),
+ framework::dataset::make("FuseTwoOps", {true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLAddFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f, tolerance_num);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<int32_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::S32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<int16_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::S16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionCLAddFixture<int16_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", {DataType::S16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::U8})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE_END() // ADD
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp
new file mode 100644
index 0000000000..4ef359e74d
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// Tolerance
+constexpr AbsoluteTolerance<float> zero_tolerance(0);
+
+/** Input data sets **/
+
+// F16
+const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+
+// F32
+const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+
+class DFConvertPolicies final : public framework::dataset::ContainerDataset<std::vector<ConvertPolicy>>
+{
+public:
+ DFConvertPolicies()
+ : ContainerDataset("ConvertPolicy",
+ {
+ ConvertPolicy::WRAP
+ })
+ {
+ }
+};
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(CAST)
+
+template <typename T>
+using DynamicFusionCLCastToF16Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, half>;
+template <typename T>
+using DynamicFusionCLCastToF32Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, float>;
+
+#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \
+ TEST_SUITE(NAME) \
+ FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \
+ DFConvertPolicies())) \
+ { \
+ validate(CLAccessor(_target), _reference, tolerance); \
+ } \
+ TEST_SUITE_END()
+
+// F16
+CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, DynamicFusionCLCastToF32Fixture<half>, CastF16toF32Dataset, zero_tolerance)
+
+// F32
+CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, DynamicFusionCLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)
+
+TEST_SUITE_END() // CAST
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp
new file mode 100644
index 0000000000..cef8b87c3f
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr float epsilon = 1e-6f;
+constexpr AbsoluteTolerance<float> tolerance(epsilon);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(CLAMP)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Minimum value larger than maximum value
+ }),
+ framework::dataset::make("MinVal", { 0.2f,
+ 1.5f,
+ 9.0f,
+ })),
+ framework::dataset::make("MaxVal", { 0.5f,
+ 2.0f,
+ 1.0f,
+ })),
+ framework::dataset::make("Expected", { true, true, false })),
+ input_info, min_val, max_val, expected)
+{
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Fuse Clamp
+ const ITensorInfo* src_info = context.create_tensor_info(input_info);
+
+ ClampAttributes attributes {};
+ attributes.min_val(min_val)
+ .max_val(max_val);
+
+ const bool res = static_cast<bool>(GpuClamp::validate_op(sketch, src_info, attributes));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionClampOpFixture = DynamicFusionClampValidationFixture<CLTensor, CLAccessor, GpuClamp, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.6f)})),
+ framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::Small5dShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.6f)})),
+ framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.2f).max_val(0.4f)})),
+ framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.3f).max_val(0.7f)})),
+ framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::Small5dShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.3f).max_val(0.7f)})),
+ framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make(
+ "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.9f)})),
+ framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // CLAMP
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp
new file mode 100644
index 0000000000..2f8c639cea
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+const auto depth_multipliers = framework::dataset::make("DepthMultiplier", {1U, 4U});
+const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", {1, 2, 5, 8});
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(DEPTHWISE_CONV2D)
+
+RelativeTolerance<float> tolerance_f32(
+ 0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(
+ 0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr float tolerance_num = 0.02f; /**< Tolerance number */
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( // Explanations of failing tests
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type input/weights
+ TensorInfo(TensorShape(3U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching input feature maps
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching depth multiplier
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases size
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases dimensions
+ TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // dilation < 1
+ TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM16, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM16, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U8, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S8, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U16, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S16, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U32, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S32, DataLayout::NHWC), // Unsupported data type
+ TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout
+ TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), // weight dimension > 3
+ TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ }),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM16, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM16, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U8, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S8, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U16, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S16, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U, 5U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NCHW),
+ TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("Padding", { Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(0, 0, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(1, 1, 0, 0),
+ Padding2D(2, 1, 2, 1),
+ Padding2D(2, 1, 2, 1),
+ Padding2D(2, 1, 2, 1),
+ })),
+ framework::dataset::make("Stride", { Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(1, 1),
+ Size2D(2, 3),
+ Size2D(2, 3),
+ })),
+ framework::dataset::make("DepthMultiplier", { 1,
+ 1,
+ 3,
+ 1,
+ 1,
+ 2,
+ 2,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ 3,
+ })),
+ framework::dataset::make("Dilation", { Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(0U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(1U, 1U),
+ Size2D(2U, 3U),
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, true, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, true, false, true, true, true })),
+ input_info, weights_info, biases_info, padding, stride, depth_multiplier, dilation, expected)
+{
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ const ITensorInfo* sketch_input_info = context.create_tensor_info(input_info);
+ const ITensorInfo* sketch_weights_info = context.create_tensor_info(weights_info);
+ const ITensorInfo* sketch_biases_info = context.create_tensor_info(biases_info);
+
+ DepthwiseConv2dAttributes attributes {};
+ attributes.pad(padding)
+ .stride(stride)
+ .dilation(dilation)
+ .depth_multiplier(depth_multiplier);
+
+ const Status status = GpuDepthwiseConv2d::validate_op(sketch, sketch_input_info, sketch_weights_info, sketch_biases_info, attributes);
+ const bool res = bool(status);
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionGpuDepthwiseConv2dFixture =
+ DynamicFusionGpuDepthwiseConv2dValidationFixture<CLTensor, CLAccessor, GpuDepthwiseConv2d, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(Dilation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeKernelSize,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(),
+ framework::dataset::make("DepthMultiplier", {1})),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+ depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuDepthwiseConv2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+ large_depth_multipliers),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // DEPTHWISE_CONV2D
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
new file mode 100644
index 0000000000..b843764786
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "tests/AssetsLibrary.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerances from tests/validation/CL/DirectConvolutionLayer.cpp
+ */
+RelativeTolerance<float> tolerance_f32(
+ 0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(
+ 0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+/** Synced with tests/validation/CL/ConvolutionLayer.cpp
+ *
+ * Difference | Why the difference
+ * f32 tolerance here is smaller | To use the same tolerance as that of DirectConv2d; lowering tolerance is safe
+ * No quantized tests | Not supported yet
+ * No grouped CNN tests | Not supported yet
+ * No mixed layout tests | Not needed; only NHWC is supported
+ * No activation | Not needed in fusion
+ * No ValidateConvolutionMethod | Only a single method (direct conv2d) is supported
+ * No ReshapeWeights = true tests | Not applicable yet. This parameter only concerns gemm-based conv2d
+ * No RunSmallWithPadding tests | Padding is removed
+ *
+ */
+TEST_SUITE(CONV2D)
+
+template <typename T>
+using DynamicFusionGpuConv2dFixture = DynamicFusionGpuConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>;
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuConv2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo())))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuConv2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", {DataLayout::NHWC})),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo())))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // FP16
+
+// Tests for specific conv2d methods
+/** Synced with tests/validation/CL/DirectConvolutionLayer.cpp
+ *
+ * Difference | Why the difference
+ * No quantized tests | Not supported yet
+ * No Invalid output size test | Not applicable. Output is removed from the interface
+ * No mixed layout/NCHW tests | Not needed; only NHWC is supported
+ * No activation tests | Not needed in fusion
+ */
+TEST_SUITE(DIRECT_CONV2D)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching data type input/weights
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching input feature maps
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid weights dimensions
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases size
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout: NCHW
+ TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type: quantized
+ TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported
+ TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported
+ }),
+ framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F16, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::QASYMM8, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(25U), 1, DataType::F32, DataLayout::NCHW),
+ TensorInfo(TensorShape(4U), 1, DataType::QASYMM8, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+ })),
+ framework::dataset::make("Conv2dAttributes", {
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}),
+ Conv2dAttributes().stride({3, 3}).pad({0, 0, 0, 0}),
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, false, true, true, true, true })),
+ input_info, weights_info, biases_info, conv2d_attrs, expected)
+{
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ const ITensorInfo* sketch_input_info = context.create_tensor_info(input_info);
+ const ITensorInfo* sketch_weights_info = context.create_tensor_info(weights_info);
+ const ITensorInfo* sketch_biases_info = context.create_tensor_info(biases_info);
+ bool is_valid = bool(GpuConv2d::validate_op(sketch, sketch_input_info, sketch_weights_info, sketch_biases_info, conv2d_attrs));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+template <typename T>
+using DynamicFusionGpuDirectConv2dFixture = DynamicFusionDirectConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>;
+
+TEST_SUITE(FP16)
+/// TODO: COMPMID-6877: Once the issue in Conv2d is resolved, re-enable these
+FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::DISABLED,
+ combine(combine(combine(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U),
+ TensorShape(32U, 37U, 13U) } ),
+ framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+ framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+ framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+ framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+ framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+ framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+ framework::dataset::make("StrideX", { 1 })),
+ framework::dataset::make("StrideY", { 1 })),
+ framework::dataset::make("PadX", { 1 })),
+ framework::dataset::make("PadY", { 1 })),
+ framework::dataset::make("KernelSize", { 9 })),
+ framework::dataset::make("NumKernels", { 3 })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+/// TODO: COMPMID-6877: Once the issue in Conv2d is resolved, re-enable these
+FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::DISABLED,
+ combine(combine(combine(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+ TensorShape(19U, 5U, 16U, 4U),
+ TensorShape(13U, 5U, 17U, 2U),
+ TensorShape(32U, 37U, 13U) } ),
+ framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+ framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+ framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+ framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+ framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+ framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+ framework::dataset::make("StrideX", { 1 })),
+ framework::dataset::make("StrideY", { 1 })),
+ framework::dataset::make("PadX", { 1 })),
+ framework::dataset::make("PadY", { 1 })),
+ framework::dataset::make("KernelSize", { 9 })),
+ framework::dataset::make("NumKernels", { 3 })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // DIRECT_CONV2D
+TEST_SUITE_END() // CONV2D
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp
new file mode 100644
index 0000000000..82d66ca6ce
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "tests/AssetsLibrary.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/MatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h"
+#include "tests/validation/reference/GEMM.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/Validation.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr float abs_tolerance_f32(
+ 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+ 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(
+ 0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+} // namespace
+
+/** M0 values to test - precommit */
+const auto m0_values_lhs_nt_precommit = framework::dataset::make("M0", {1, 2, 3});
+
+/** N0 values to test - precommit */
+const auto n0_values_rhs_t_precommit = framework::dataset::make("N0", {1, 2, 4});
+
+/** K0 values to test - precommit */
+const auto k0_values_rhs_t_precommit = framework::dataset::make("K0", {1, 2, 4});
+
+/** M0 values to test - nightly */
+const auto m0_values_lhs_nt_nightly = framework::dataset::make("M0", {1, 2, 3, 4});
+
+/** N0 values to test - nightly */
+const auto n0_values_rhs_t_nightly = framework::dataset::make("N0", {1, 2, 3, 4, 8});
+
+/** K0 values to test - nightly */
+const auto k0_values_rhs_t_nightly = framework::dataset::make("K0", {1, 2, 3, 4, 8});
+
+class DFMatMulDataset final : public datasets::MatMulDataset
+{
+public:
+ DFMatMulDataset()
+ {
+ // LHS = [K, M], RHS = [N, K], DST = [N, M]
+ add_config(TensorShape(1U, 1U), TensorShape(1U, 1U), TensorShape(1U, 1U));
+ add_config(TensorShape(1U, 2U), TensorShape(2U, 1U), TensorShape(2U, 2U));
+ add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U));
+ add_config(TensorShape(32U, 37U), TensorShape(17U, 32U), TensorShape(17U, 37U));
+ }
+};
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+
+TEST_SUITE(MatMul)
+
+TEST_SUITE(Validate)
+TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
+{
+ using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+ const std::vector<MatMulConfigurationPair> supported_block_sizes = {
+ // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+
+ // Lhs not-transposed, Rhs transposed
+ {MatMulKernelInfo(false, true, 0, 1, 1), false}, // M0 should be > 0
+ {MatMulKernelInfo(false, true, 3, 11, 1), false}, // N0 not in {1, 2, 3, 4, 8, 16}
+ {MatMulKernelInfo(false, true, 3, 7, 1), false}, // N0 not in {1, 2, 3, 4, 8, 16}
+ {MatMulKernelInfo(false, true, 3, 3, 12), false}, // K0 not in {1, 2, 3, 4, 8, 16}
+ {MatMulKernelInfo(false, true, 3, 3, 6), false}, // K0 not in {1, 2, 3, 4, 8, 16}
+ {MatMulKernelInfo(false, true, 5, 1, 2), true}, {MatMulKernelInfo(false, true, 3, 3, 3), true},
+ {MatMulKernelInfo(false, true, 2, 4, 8), true},
+
+ };
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+ // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+ // not the shapes themselves.
+ const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(TensorShape(100U, 100U), 1, DataType::F32));
+ const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(TensorShape(100U, 100U), 1, DataType::F32));
+
+ for (auto &pair : supported_block_sizes)
+ {
+ MatMulAttributes matmul_attr{};
+ matmul_attr.adj_lhs(pair.first.adj_lhs);
+ matmul_attr.adj_rhs(pair.first.adj_rhs);
+
+ GpuMatMulSettings matmul_settings{};
+ matmul_settings.m0(pair.first.m0);
+ matmul_settings.n0(pair.first.n0);
+ matmul_settings.k0(pair.first.k0);
+
+ Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings);
+ ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+ // Create a sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool>;
+ const std::vector<ShapeConfigurationTuple> shape_configurations = {
+ {TensorShape(5U, 1U), TensorShape(3U, 5U), true},
+ {TensorShape(10U, 12U), TensorShape(3U, 10U), true},
+ {TensorShape(8U, 4U), TensorShape(2U, 8U), true},
+ {TensorShape(8U, 4U), TensorShape(2U, 5U), false}, // Mismatch in the K dimension
+ {TensorShape(5U, 0U), TensorShape(2U, 5U), false}, // Invalid dimension
+ {TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), true},
+ {TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false}, // no batch broadcasting
+ {TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U),
+ false}, // mismatch in batch dimension
+ };
+
+ for (auto &tuple : shape_configurations)
+ {
+ const bool expected = std::get<2>(tuple);
+
+ for (bool adj_lhs : {false})
+ {
+ for (bool adj_rhs : {true})
+ {
+ TensorShape lhs_shape = std::get<0>(tuple);
+ TensorShape rhs_shape = std::get<1>(tuple);
+
+ if (adj_lhs)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+
+ if (adj_rhs)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(lhs_shape, 1, DataType::F32));
+ const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(rhs_shape, 1, DataType::F32));
+
+ MatMulAttributes matmul_attr{};
+ matmul_attr.adj_lhs(adj_lhs);
+ matmul_attr.adj_rhs(adj_rhs);
+
+ GpuMatMulSettings matmul_settings{};
+ matmul_settings.m0(1);
+ matmul_settings.n0(1);
+ matmul_settings.k0(1);
+
+ Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+ }
+ }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+ // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+ using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>;
+ const std::vector<DataTypeConfigurationTuple> data_type_configurations = {
+ {DataType::F32, DataType::F32, DataType::F32, true},
+ {DataType::F16, DataType::F16, DataType::F16, true},
+ {DataType::F16, DataType::F32, DataType::F32, false}, // no mixed precision
+ {DataType::F64, DataType::F64, DataType::F64, false}, // no double precision
+ {DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false}, // no quantized types
+ {DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false}, // no quantized types
+ {DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL,
+ false}, // no quantized types
+ {DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false}, // no quantized types
+ {DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false}, // no quantized types
+ {DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false}, // no quantized types
+ {DataType::S64, DataType::S64, DataType::S64, false}, // no integral types
+ {DataType::S32, DataType::S32, DataType::S32, false}, // no integral types
+ {DataType::S16, DataType::S16, DataType::S16, false}, // no integral types
+ {DataType::S8, DataType::S8, DataType::S8, false}, // no integral types
+ {DataType::U64, DataType::U64, DataType::U64, false}, // no integral types
+ {DataType::U32, DataType::U32, DataType::U32, false}, // no integral types
+ {DataType::U16, DataType::U16, DataType::U16, false}, // no integral types
+ {DataType::U8, DataType::U8, DataType::U8, false}, // no integral types
+ };
+ // Create a sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const TensorShape shape = TensorShape(10U, 10U);
+ MatMulAttributes matmul_attr{};
+ matmul_attr.adj_lhs(false);
+ matmul_attr.adj_rhs(false);
+ GpuMatMulSettings matmul_settings{};
+ matmul_settings.m0(1);
+ matmul_settings.n0(1);
+ matmul_settings.k0(1);
+
+ for (auto &tuple : data_type_configurations)
+ {
+ const bool expected = std::get<3>(tuple);
+
+ const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape, 1, std::get<0>(tuple)));
+ const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape, 1, std::get<1>(tuple)));
+
+ Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_SUITE_END() // Validate
+
+template <typename T>
+using DynamicFusionGpuMatmulFixture = DynamicFusionGpuMatMulValidationFixture<CLTensor, CLAccessor, GpuMatMul, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunPrecommit,
+ DynamicFusionGpuMatmulFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(DFMatMulDataset(),
+ framework::dataset::make("TransposeA", {false}),
+ framework::dataset::make("TransposeB", {true}),
+ m0_values_lhs_nt_precommit,
+ n0_values_rhs_t_precommit,
+ k0_values_rhs_t_precommit,
+ framework::dataset::make("ExportRhsToCLImage", {false}),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunNightly,
+ DynamicFusionGpuMatmulFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(DFMatMulDataset(),
+ framework::dataset::make("TransposeA", {false}),
+ framework::dataset::make("TransposeB", {true}),
+ m0_values_lhs_nt_nightly,
+ n0_values_rhs_t_nightly,
+ k0_values_rhs_t_nightly,
+ framework::dataset::make("ExportRhsToCLImage", {false}),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunPrecommit,
+ DynamicFusionGpuMatmulFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(DFMatMulDataset(),
+ framework::dataset::make("TransposeA", {false}),
+ framework::dataset::make("TransposeB", {true}),
+ m0_values_lhs_nt_precommit,
+ n0_values_rhs_t_precommit,
+ k0_values_rhs_t_precommit,
+ framework::dataset::make("ExportRhsToCLImage", {false}),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunNightly,
+ DynamicFusionGpuMatmulFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(DFMatMulDataset(),
+ framework::dataset::make("TransposeA", {false}),
+ framework::dataset::make("TransposeB", {true}),
+ m0_values_lhs_nt_nightly,
+ n0_values_rhs_t_nightly,
+ k0_values_rhs_t_nightly,
+ framework::dataset::make("ExportRhsToCLImage", {false}),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // MatMul
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
new file mode 100644
index 0000000000..af02ce3eaa
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/DynamicFusionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* Synced with tests/validation/CL/PixelwiseMultiplication.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No integer tests | Not supported yet
+ * No quantized tests | Not supported yet
+ * No convert policy tests | Not needed as convert policy is ignored by floating types
+ * No scale tests | Not supported yet
+ * No rounding modes tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f16(
+ 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> tolerance_f32(
+ 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(MUL)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Unsupported data type U8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Unsupported data type S8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Unsupported data type S16
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Unsupported data type S32
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8_SIGNED
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
+ TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::F32), // Broadcast Z dimension is not allowed
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
+ TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false, false, true, true, false, false, true })),
+ input1_info, input2_info, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Validate Elementwise Mul
+ auto lhs_info = context.create_tensor_info(input1_info);
+ auto rhs_info = context.create_tensor_info(input2_info);
+
+ bool res = bool(GpuMul::validate_op(sketch, lhs_info, rhs_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionCLMulFixture = DynamicFusionMulOneOpValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+template <typename T>
+using DynamicFusionCLMulBroadcastFixture = DynamicFusionMulBroadcastValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+template <typename T>
+using DynamicFusionCLMulTwoOpsFixture = DynamicFusionMulTwoOpsValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLMulFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<half>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // F16
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLMulFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLMulFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionCLMulTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes(),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})),
+ framework::dataset::make("FuseTwoOps", {true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // MUL
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
new file mode 100644
index 0000000000..be816b32b3
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/dynamic_fusion/PoolingLayerDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(POOL2D)
+
+constexpr AbsoluteTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float> tolerance_f16(
+ 0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+
+const auto PoolingLayerDatasetFP =
+ combine(combine(combine(combine(framework::dataset::make("PoolingType", {PoolingType::MAX, PoolingType::AVG}),
+ framework::dataset::make("PoolingSize", {Size2D(2, 2), Size2D(3, 3)})),
+ framework::dataset::make("Pad", {Padding2D()})),
+ framework::dataset::make("Stride", {Size2D(1, 1), Size2D(2, 1), Size2D(5, 7)})),
+ framework::dataset::make("ExcludePadding", {true}));
+
+template <typename T>
+using DynamicFusionGpuPool2dFixture = DynamicFusionGpuPool2dValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>;
+
+template <typename T>
+using DFSpecialGpuPool2dFixture = DynamicFusionGpuPool2dSpecialValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>;
+// *INDENT-OFF*
+// clang-format off
+
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid parameters, unsupported pooling
+ TensorInfo(TensorShape(5U, 15U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid Non-rectangular Global Pooling
+ TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid - Quantized not supported.
+ TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid global pooling
+ TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout
+ }),
+ framework::dataset::make("Pool2dAttributes", {
+ Pool2dAttributes().pool_type(PoolingType::L2).pool_size(Size2D(3,3)).pad(Padding2D(0,0,0,0)).stride(Size2D(1,1)),
+ Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(15U, 13U)),
+ Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).pad(Padding2D()).stride(Size2D(1,1)),
+ Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)),
+ Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)),
+ })),
+ framework::dataset::make("Expected", { false, true, false, true, false })),
+ input_info, pool2d_attr, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Declare GpuPool2d settings
+ const GpuPool2dSettings &settings = GpuPool2dSettings();
+
+ // Validate Pool2d Configuration
+ auto src_info = context.create_tensor_info(input_info);
+ bool res = bool(GpuPool2d::validate_op(sketch, src_info, pool2d_attr, settings));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuPool2dFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallNoneUnitShapes(), PoolingLayerDatasetFP),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionGpuPool2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::LargeShapes(), PoolingLayerDatasetFP),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunSpecial,
+ DFSpecialGpuPool2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(datasets::PoolingLayerDatasetSpecialDynamicFusion(),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(GlobalPooling)
+FIXTURE_DATA_TEST_CASE(
+ RunSmall,
+ DynamicFusionGpuPool2dFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape",
+ {TensorShape(27U, 13U, 2U),
+ TensorShape(27U, 13U, 2U, 4U)}),
+ framework::dataset::make("PoolingType",
+ {PoolingType::AVG, PoolingType::MAX})),
+ framework::dataset::make("PoolingSize", {Size2D(27, 13)})),
+ framework::dataset::make("Pad", {Padding2D()})),
+ framework::dataset::make("Stride", {Size2D(1, 1)})),
+ framework::dataset::make("ExcludePadding", true)),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(
+ RunLarge,
+ DynamicFusionGpuPool2dFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape",
+ {TensorShape(79U, 37U, 11U),
+ TensorShape(79U, 37U, 11U, 4U)}),
+ framework::dataset::make("PoolingType",
+ {PoolingType::AVG, PoolingType::MAX})),
+ framework::dataset::make("PoolingSize", {Size2D(79, 37)})),
+ framework::dataset::make("Pad", {Padding2D()})),
+ framework::dataset::make("Stride", {Size2D(1, 1)})),
+ framework::dataset::make("ExcludePadding", true)),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+TEST_SUITE(GlobalPooling)
+FIXTURE_DATA_TEST_CASE(
+ RunSmall,
+ DynamicFusionGpuPool2dFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape",
+ {TensorShape(27U, 13U, 2U),
+ TensorShape(27U, 13U, 2U, 4U)}),
+ framework::dataset::make("PoolingType",
+ {PoolingType::AVG, PoolingType::MAX})),
+ framework::dataset::make("PoolingSize", {Size2D(27, 13)})),
+ framework::dataset::make("Pad", {Padding2D()})),
+ framework::dataset::make("Stride", {Size2D(1, 1)})),
+ framework::dataset::make("ExcludePadding", true)),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(
+ RunLarge,
+ DynamicFusionGpuPool2dFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape",
+ {TensorShape(79U, 37U, 11U),
+ TensorShape(79U, 37U, 11U, 4U)}),
+ framework::dataset::make("PoolingType",
+ {PoolingType::AVG, PoolingType::MAX})),
+ framework::dataset::make("PoolingSize", {Size2D(79, 37)})),
+ framework::dataset::make("Pad", {Padding2D()})),
+ framework::dataset::make("Stride", {Size2D(1, 1)})),
+ framework::dataset::make("ExcludePadding", true)),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // FLOAT
+
+TEST_SUITE_END() // POOL2D
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp
new file mode 100644
index 0000000000..d46754ccca
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ReshapeLayerDataset.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(RESHAPE)
+
+DATA_TEST_CASE(Validate,
+ framework::DatasetMode::DISABLED,
+ zip(zip(framework::dataset::make(
+ "InputInfo",
+ {
+ TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+ TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32) /*mismatching dimensions*/,
+ }),
+ framework::dataset::make("OutputShape",
+ {
+ TensorShape(9U, 5U, 21U),
+ TensorShape(8U, 24U, 4U),
+ TensorShape(192U, 192U),
+ })),
+ framework::dataset::make("Expected", {true, true, false})),
+ input_info,
+ output_shape,
+ expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ TensorShape input_shape = input_info.tensor_shape();
+ ARM_COMPUTE_UNUSED(input_shape);
+ ITensorInfo *src_info = context.create_tensor_info(input_info);
+
+ ReshapeAttributes attributes;
+ attributes.shape(output_shape);
+ Status status = GpuReshape::validate_op(sketch, src_info, attributes);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using DynamicFusionGpuReshapeLayerFixture =
+ DynamicFusionGpuReshapeLayerValidationFixture<CLTensor, CLAccessor, GpuReshape, T>;
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuReshapeLayerFixture<float>,
+ framework::DatasetMode::DISABLED,
+ combine(datasets::SmallReshapeLayerDataset(),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuReshapeLayerFixture<half>,
+ framework::DatasetMode::DISABLED,
+ combine(datasets::SmallReshapeLayerDataset(),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // F16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuReshapeLayerFixture<uint8_t>,
+ framework::DatasetMode::DISABLED,
+ combine(datasets::SmallReshapeLayerDataset(),
+ framework::dataset::make("DataType", DataType::U8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuReshapeLayerFixture<int8_t>,
+ framework::DatasetMode::DISABLED,
+ combine(datasets::SmallReshapeLayerDataset(),
+ framework::dataset::make("DataType", DataType::S8)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S8
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionGpuReshapeLayerFixture<int16_t>,
+ framework::DatasetMode::DISABLED,
+ combine(datasets::SmallReshapeLayerDataset(),
+ framework::dataset::make("DataType", DataType::S16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END() // RESHAPE
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
new file mode 100644
index 0000000000..a6bcf4ae26
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
@@ -0,0 +1,359 @@
+/*
+* Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ScaleValidationDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+using datasets::ScaleAlignCornersSamplingPolicySet;
+using datasets::ScaleInterpolationPolicySet;
+using datasets::ScaleSamplingPolicySet;
+using datasets::ScaleShapesBaseDataSet;
+
+/** We consider vector size in byte 16 since the maximum size of
+ * a vector used by @ref CLScaleKernel is currently 16-byte (float4).
+ */
+constexpr uint32_t vector_byte = 16;
+
+template <typename T>
+constexpr uint32_t num_elements_per_vector()
+{
+ return vector_byte / sizeof(T);
+}
+
+/** Quantization information data set */
+const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo",
+ {
+ QuantizationInfo(0.5f, -1),
+ });
+
+/** Tolerance */
+constexpr float tolerance_f32_absolute(0.001f);
+
+RelativeTolerance<float> tolerance_f32(0.05);
+constexpr float abs_tolerance_f16(0.1f);
+RelativeTolerance<half> tolerance_f16(half(0.1));
+
+constexpr float tolerance_num_f32(0.01f);
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(RESIZE)
+
+TEST_SUITE(Validate)
+
+const auto default_input_shape = TensorShape{2, 3, 3, 2};
+const auto default_output_shape = TensorShape{4, 6, 3, 2};
+
+constexpr auto default_data_type = DataType::U8;
+constexpr auto default_data_layout = DataLayout::NHWC;
+
+TEST_CASE(NullPtr, framework::DatasetMode::ALL)
+{
+ const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout};
+ const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, default_data_layout};
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // nullptr is given as input
+ Status status = GpuResize::validate_op(sketch, nullptr, ResizeAttributes());
+ ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(SupportDataType, framework::DatasetMode::ALL)
+{
+ const std::map<DataType, bool> supported_data_types =
+ {
+ { DataType::U8, false },
+ { DataType::S8, false },
+ { DataType::QSYMM8, false },
+ { DataType::QASYMM8, false },
+ { DataType::QASYMM8_SIGNED, false },
+ { DataType::QSYMM8_PER_CHANNEL, false },
+ { DataType::U16, false },
+ { DataType::S16, false },
+ { DataType::QSYMM16, false },
+ { DataType::QASYMM16, false },
+ { DataType::U32, false },
+ { DataType::S32, false },
+ { DataType::U64, false },
+ { DataType::S64, false },
+ { DataType::BFLOAT16, false },
+ { DataType::F16, true },
+ { DataType::F32, true },
+ { DataType::F64, false },
+ { DataType::SIZET, false },
+ };
+
+ for (auto &kv : supported_data_types)
+ {
+ const TensorInfo input_info = TensorInfo{default_input_shape, 1, kv.first, default_data_layout};
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info);
+
+ ResizeAttributes attributes;
+ attributes.output_width(default_output_shape[0]); // shape is not important unless it's empty
+ attributes.output_height(default_output_shape[1]);
+
+ Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes);
+ ARM_COMPUTE_EXPECT(bool(status) == kv.second, framework::LogLevel::ERRORS);
+ }
+}
+
+TEST_CASE(MismatchingDataType, framework::DatasetMode::ALL)
+{
+ constexpr DataType non_default_data_type = DataType::F32;
+
+ const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout};
+ const TensorInfo output_info = TensorInfo{default_output_shape, 1, non_default_data_type, default_data_layout};
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info);
+
+ Status status = GpuResize::validate_op(sketch, sketch_input_info, ResizeAttributes());
+ ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL)
+{
+ // Aligned corners require sampling policy to be TOP_LEFT.
+ constexpr InterpolationPolicy interpolation_policy = InterpolationPolicy::BILINEAR;
+ constexpr bool align_corners = true;
+ constexpr SamplingPolicy sampling_policy = SamplingPolicy::CENTER;
+
+ const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout};
+ const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, default_data_layout};
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info);
+
+ ResizeAttributes attributes{};
+ attributes.interpolation_policy(interpolation_policy).sampling_policy(sampling_policy).align_corners(align_corners);
+
+ Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes);
+ ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(UnsupportedInterpolationPolicy, framework::DatasetMode::ALL)
+{
+ const TensorInfo input_info = TensorInfo{TensorShape(28U, 33U, 2U), 1, DataType::F32, default_data_layout};
+ const TensorInfo output_info = TensorInfo{TensorShape(26U, 21U, 2U), 1, DataType::F32, default_data_layout};
+ constexpr auto interpolation_policy = InterpolationPolicy::AREA;
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info);
+
+ ResizeAttributes attributes{};
+ attributes.interpolation_policy(interpolation_policy);
+
+ Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes);
+ ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(UnsupportedLayout, framework::DatasetMode::ALL)
+{
+ const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, DataLayout::NCHW};
+ const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, DataLayout::NCHW};
+ constexpr auto interpolation_policy = InterpolationPolicy::BILINEAR;
+
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info);
+
+ ResizeAttributes attributes{};
+ attributes.interpolation_policy(interpolation_policy);
+
+ Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes);
+ ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END() // Validate
+
+template <typename T>
+using DynamicFusionResizeFixture = DynamicFusionResizeValidationFixture<CLTensor, CLAccessor, GpuResize, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+const auto f32_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<float>())),
+ framework::dataset::make("DataType", DataType::F32));
+
+FIXTURE_DATA_TEST_CASE(Run,
+ DynamicFusionResizeFixture<float>,
+ framework::DatasetMode::ALL,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_shape, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
+}
+
+FIXTURE_DATA_TEST_CASE(RunAlignCorners,
+ DynamicFusionResizeFixture<float>,
+ framework::DatasetMode::ALL,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_shape, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
+}
+const auto f32_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<float>())),
+ framework::dataset::make("DataType", DataType::F32));
+FIXTURE_DATA_TEST_CASE(RunNightly,
+ DynamicFusionResizeFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_nightly_shape, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
+}
+FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
+ DynamicFusionResizeFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_nightly_shape, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+const auto f16_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<half>())),
+ framework::dataset::make("DataType", DataType::F16));
+FIXTURE_DATA_TEST_CASE(Run,
+ DynamicFusionResizeFixture<half>,
+ framework::DatasetMode::ALL,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_shape, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunAlignCorners,
+ DynamicFusionResizeFixture<half>,
+ framework::DatasetMode::ALL,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_shape, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+const auto f16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<half>())),
+ framework::dataset::make("DataType", DataType::F16));
+FIXTURE_DATA_TEST_CASE(RunNightly,
+ DynamicFusionResizeFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_nightly_shape, ScaleSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
+ DynamicFusionResizeFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_nightly_shape, ScaleAlignCornersSamplingPolicySet))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ const ValidRegion valid_region =
+ calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
+
+ // Validate output
+ validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // RESIZE
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp
new file mode 100644
index 0000000000..0134a7c11b
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f32(1e-6f);
+constexpr AbsoluteTolerance<float> tolerance_f16(0.001f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(SIGMOID)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type
+ }),
+ framework::dataset::make("Expected", { true, true, false })),
+ input_info, expected)
+{
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Fuse sigmoid
+ const ITensorInfo *src_info = context.create_tensor_info(input_info);
+
+ const bool res = static_cast<bool>(GpuSigmoid::validate_op(sketch, src_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionSigmoidOpFixture = DynamicFusionSigmoidValidationFixture<CLTensor, CLAccessor, GpuSigmoid, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionSigmoidOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionSigmoidOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionSigmoidOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionSigmoidOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionSigmoidOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionSigmoidOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // SIGMOID
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp
new file mode 100644
index 0000000000..8f5a1ed14a
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Tolerance for float operations */
+RelativeTolerance<half> tolerance_f16(half(0.2));
+RelativeTolerance<float> tolerance_f32(0.001f);
+
+using framework::dataset::make;
+
+/// TODO: COMPMID-6713
+/// Softmax is not implemented in CKW. Therefore, the tests are DISABLED.
+/// Enable the tests when Softmax is implemented in CKW.
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(SOFTMAX)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::DISABLED,
+ zip(
+ make("InputInfo", {
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::S32), // Unsupported data type
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ }),
+ make("OutputInfo",{
+ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 11U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM16), // Unsupported data type
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
+ }),
+ make("beta", {
+ 1.0,
+ 2.0,
+ 2.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ }),
+ make("axis", {
+ 0,
+ 0,
+ 1, // Invalid as axis != 0
+ 0,
+ 0,
+ 0,
+ -3, // Invalid as axis != 0
+ 2, // Invalid as axis != 0
+ 1, // Invalid as axis != 0
+ -1, // Invalid as axis != 0
+ }),
+ make("Expected", { false, false, false, true, false, false, false, false, false, false})),
+ input_info, output_info, beta, axis, expected)
+{
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ SoftmaxAttributes softmax_attr{};
+ softmax_attr.axis(axis).beta(beta).is_log_softmax(false);
+ ITensorInfo* src_info = context.create_tensor_info(input_info);
+ ITensorInfo* dst_info = context.create_tensor_info(output_info);
+ const bool res = static_cast<bool>(GpuSoftmax::validate_op(sketch, src_info, dst_info, softmax_attr));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using DynamicFusionSoftmaxLayerFixture = DynamicFusionSoftmaxValidationFixture<CLTensor, CLAccessor, GpuSoftmax, T>;
+
+TEST_SUITE(FLOAT)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+
+FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayerLargeShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+
+FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayer4DShapes(),
+ make("DataType", DataType::F32),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayerSmallShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+
+FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayerLargeShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+
+FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED,
+ combine(
+ datasets::SoftmaxLayer4DShapes(),
+ make("DataType", DataType::F16),
+ make("Beta", { 1.0f, 2.0f }),
+ make("Axis", { 0 }),
+ make("is_log", {false, true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // FLOAT
+
+TEST_SUITE_END() // SOFTMAX
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
new file mode 100644
index 0000000000..c7ab1e717c
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/DynamicFusionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* Synced with tests/validation/CL/ArithmeticSubtraction.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No quantized tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(SUB)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), // Unsupported data type U32
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
+ TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
+ TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, true, true, false, false, true })),
+ input1_info, input2_info, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Validate Elementwise Sub
+ auto lhs_info = context.create_tensor_info(input1_info);
+ auto rhs_info = context.create_tensor_info(input2_info);
+
+ bool res = bool(GpuSub::validate_op(sketch, lhs_info, rhs_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionCLSubFixture =
+ DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+template <typename T>
+using DynamicFusionCLSubBroadcastFixture =
+ DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+template <typename T>
+using DynamicFusionCLSubTwoOpsFixture =
+ DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLSubFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLSubFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::TemporaryLimitedLargeShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(
+ RunSmallTwoOps,
+ DynamicFusionCLSubTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()),
+ framework::dataset::make("DataType", {DataType::F32})),
+ framework::dataset::make("InPlace", {false})),
+ framework::dataset::make("FuseTwoOps", {true})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLSubFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", {DataType::F16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<int32_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::S32})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<int16_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::S16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionCLSubFixture<int16_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", {DataType::S16})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", {DataType::U8})),
+ framework::dataset::make("InPlace", {false})))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE_END() // SUB
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp b/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp
new file mode 100644
index 0000000000..2560f3aab1
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuTanh.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
+constexpr AbsoluteTolerance<float> tolerance_f16(0.001f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(TANH)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type
+ }),
+ framework::dataset::make("Expected", { true, true, false })),
+ input_info, expected)
+{
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Fuse tanh
+ const ITensorInfo* src_info = context.create_tensor_info(input_info);
+
+ const bool res = static_cast<bool>(GpuTanh::validate_op(sketch, src_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionTanhOpFixture = DynamicFusionTanhValidationFixture<CLTensor, CLAccessor, GpuTanh, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionTanhOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionTanhOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionTanhOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionTanhOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionTanhOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation.");
+ framework::ARM_COMPUTE_PRINT_INFO();
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionTanhOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // TANH
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h
index 531b3abdf9..a24ba8913e 100644
--- a/tests/validation/fixtures/ActivationLayerFixture.h
+++ b/tests/validation/fixtures/ActivationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -47,12 +47,7 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ActivationValidationGenericFixture : public framework::Fixture
{
public:
- ActivationValidationGenericFixture()
- : _target(parameters->get_ctx<TensorType>())
- {
- }
- template <typename...>
void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info)
{
ActivationLayerInfo info(function, alpha_beta, alpha_beta);
@@ -120,13 +115,12 @@ protected:
TensorType compute_target(const TensorShape &shape, ActivationLayerInfo info)
{
- auto ctx = parameters->get_ctx<TensorType>();
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW, ctx);
- TensorType dst = create_tensor<TensorType>(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW, ctx);
+ TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW);
+ TensorType dst = create_tensor<TensorType>(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW);
// Create and configure function
- FunctionType act_layer(ctx);
+ FunctionType act_layer;
TensorType *dst_ptr = _in_place ? nullptr : &dst;
@@ -234,7 +228,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ActivationValidationFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type)
{
ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, QuantizationInfo());
@@ -245,7 +238,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ActivationValidationQuantizedFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info)
{
ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, quantization_info);
@@ -255,4 +247,4 @@ public:
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/AddMulAddFixture.h b/tests/validation/fixtures/AddMulAddFixture.h
new file mode 100644
index 0000000000..d13fef2f02
--- /dev/null
+++ b/tests/validation/fixtures/AddMulAddFixture.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
+#include "tests/validation/reference/DequantizationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include "tests/validation/reference/QuantizationLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class AddMulAddGenericFixture : public framework::Fixture
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out)
+ {
+ compute_target(shape, data_type, act_info, interm_out);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i, DataType data_type)
+ {
+ switch(data_type)
+ {
+ case DataType::F32:
+ library->fill_tensor_uniform(tensor, i, -10.f, 10.f);
+ break;
+ case DataType::F16:
+ library->fill_tensor_uniform(tensor, i, -1.f, 1.f);
+ break;
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ break;
+ }
+ }
+
+ void compute_target(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out)
+ {
+ TensorShape b_shape(shape.x());
+
+ // Create tensors
+ TensorType input1 = create_tensor<TensorType>(shape, data_type, 1, _input1_qinfo);
+ TensorType input2 = create_tensor<TensorType>(shape, data_type, 1, _input2_qinfo);
+ TensorType bn_mul = create_tensor<TensorType>(b_shape, data_type, 1, _bn_mul_qinfo);
+ TensorType bn_add = create_tensor<TensorType>(b_shape, data_type, 1, _bn_add_qinfo);
+ TensorType add_output = create_tensor<TensorType>(shape, data_type, 1, _add_output_qinfo);
+ TensorType final_output = create_tensor<TensorType>(shape, data_type, 1, _final_output_qinfo);
+
+ // Create and configure function
+ FunctionType add_mul_add;
+ ARM_COMPUTE_ERROR_THROW_ON(add_mul_add.validate(input1.info(), input2.info(), bn_mul.info(),
+ bn_add.info(), interm_out ? add_output.info() : nullptr, final_output.info(),
+ ConvertPolicy::SATURATE, act_info));
+
+ add_mul_add.configure(&input1, &input2, &bn_mul, &bn_add, interm_out ? &add_output : nullptr,
+ &final_output, ConvertPolicy::SATURATE, act_info);
+
+ // Allocate tensors
+ input1.allocator()->allocate();
+ input2.allocator()->allocate();
+ bn_mul.allocator()->allocate();
+ bn_add.allocator()->allocate();
+
+ if(interm_out)
+ {
+ add_output.allocator()->allocate();
+ }
+
+ final_output.allocator()->allocate();
+
+ // Fill tensors
+ fill(AccessorType(input1), 0, data_type);
+ fill(AccessorType(input2), 1, data_type);
+ fill(AccessorType(bn_mul), 2, data_type);
+ fill(AccessorType(bn_add), 3, data_type);
+
+ // // Compute function
+ add_mul_add.run();
+
+ _target = std::move(final_output);
+
+ if(interm_out)
+ {
+ _interm_target = std::move(add_output);
+ }
+ }
+
+ TensorType _target{};
+ TensorType _interm_target{};
+ SimpleTensor<T> _reference{};
+ SimpleTensor<T> _interm_reference{};
+
+ QuantizationInfo _input1_qinfo{};
+ QuantizationInfo _input2_qinfo{};
+ QuantizationInfo _bn_mul_qinfo{};
+ QuantizationInfo _bn_add_qinfo{};
+ QuantizationInfo _add_output_qinfo{};
+ QuantizationInfo _final_output_qinfo{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out>
+class AddMulAddFloatValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>;
+
+ void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info)
+ {
+ Parent::setup(shape, data_type, act_info, interm_out);
+ compute_reference(shape, data_type, act_info);
+ }
+
+ // Compute Reference is moved outside of the generic fixture because with the quantized data types,
+ // it becomes a very different implementation with intermediate tensors' data types being always float.
+ // This way the reference calculations are more readable and the size of the classes will be smaller
+ // due to unrepeated fill() and target() methods.
+ void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info)
+ {
+ TensorShape b_shape(shape.x());
+
+ // Create reference
+ SimpleTensor<T> input1{ shape, data_type };
+ SimpleTensor<T> input2{ shape, data_type };
+ SimpleTensor<T> bn_mul{ b_shape, data_type };
+ SimpleTensor<T> bn_add{ b_shape, data_type };
+ SimpleTensor<T> add_output{ shape, data_type, 1 };
+
+ SimpleTensor<T> bn_mul_out{ shape, data_type };
+ SimpleTensor<T> bn_add_out{ shape, data_type };
+
+ // Fill reference
+ Parent::fill(input1, 0, data_type);
+ Parent::fill(input2, 1, data_type);
+ Parent::fill(bn_mul, 2, data_type);
+ Parent::fill(bn_add, 3, data_type);
+
+ reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, input1, input2, add_output, ConvertPolicy::SATURATE);
+ bn_mul_out = reference::pixel_wise_multiplication<T, T, T>(add_output, bn_mul, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, data_type);
+ reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, bn_mul_out, bn_add, bn_add_out, ConvertPolicy::SATURATE);
+
+ if(interm_out)
+ {
+ Parent::_interm_reference = std::move(add_output);
+ }
+
+ if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY)
+ {
+ Parent::_reference = reference::activation_layer(bn_add_out, act_info);
+ }
+ else
+ {
+ Parent::_reference = std::move(bn_add_out);
+ }
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out>
+class AddMulAddQuantizedValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>;
+
+ void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info,
+ QuantizationInfo input1_qinfo, QuantizationInfo input2_qinfo, QuantizationInfo bn_mul_qinfo,
+ QuantizationInfo bn_add_qinfo, QuantizationInfo add_output_qinfo, QuantizationInfo final_output_qinfo)
+ {
+ // Quantization arguments moved to class attributes to prevent long function declerations
+ Parent::_input1_qinfo = input1_qinfo;
+ Parent::_input2_qinfo = input2_qinfo;
+ Parent::_bn_mul_qinfo = bn_mul_qinfo;
+ Parent::_bn_add_qinfo = bn_add_qinfo;
+ Parent::_add_output_qinfo = add_output_qinfo;
+ Parent::_final_output_qinfo = final_output_qinfo;
+
+ Parent::setup(shape, data_type, act_info, interm_out);
+ compute_reference(shape, data_type, act_info);
+ }
+
+ // Compute Reference is moved outside of the generic fixture because with the quantized data types,
+ // it becomes a very different implementation with intermediate tensors' data types being always float.
+ // This way the reference calculations are more readable and the size of the classes will be smaller
+ // due to unrepeated fill() and target() methods.
+ void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info)
+ {
+ TensorShape b_shape(shape.x());
+
+ // Create reference
+ SimpleTensor<T> input1{ shape, data_type, 1, Parent::_input1_qinfo };
+ SimpleTensor<T> input2{ shape, data_type, 1, Parent::_input2_qinfo };
+ SimpleTensor<T> bn_mul{ b_shape, data_type, 1, Parent::_bn_mul_qinfo };
+ SimpleTensor<T> bn_add{ b_shape, data_type, 1, Parent::_bn_add_qinfo };
+
+ // Fill input tensors
+ Parent::fill(input1, 0, data_type);
+ Parent::fill(input2, 1, data_type);
+ Parent::fill(bn_mul, 2, data_type);
+ Parent::fill(bn_add, 3, data_type);
+
+ SimpleTensor<float> input1_dequantized = reference::dequantization_layer<float>(input1);
+ SimpleTensor<float> input2_dequantized = reference::dequantization_layer<float>(input2);
+ SimpleTensor<float> bn_mul_dequantized = reference::dequantization_layer<float>(bn_mul);
+ SimpleTensor<float> bn_add_dequantized = reference::dequantization_layer<float>(bn_add);
+
+ SimpleTensor<float> add_output_dequantized{ shape, DataType::F32 };
+ SimpleTensor<float> bn_add_out_dequantized{ shape, DataType::F32 };
+
+ reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, input1_dequantized, input2_dequantized, add_output_dequantized, ConvertPolicy::SATURATE);
+ SimpleTensor<float> bn_mul_out_dequantized = reference::pixel_wise_multiplication<float, float, float>(add_output_dequantized, bn_mul_dequantized, 1.f, ConvertPolicy::SATURATE,
+ RoundingPolicy::TO_NEAREST_UP, DataType::F32);
+ reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, bn_mul_out_dequantized, bn_add_dequantized, bn_add_out_dequantized, ConvertPolicy::SATURATE);
+
+ if(interm_out)
+ {
+ Parent::_interm_reference = reference::quantization_layer<float, T>(add_output_dequantized, data_type, Parent::_add_output_qinfo);
+ }
+
+ if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY)
+ {
+ SimpleTensor<T> ref = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo);
+ Parent::_reference = reference::activation_layer(ref, act_info);
+ }
+ else
+ {
+ Parent::_reference = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo);
+ }
+ }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H
diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h
index caa6bb8d9c..7a823568a8 100644
--- a/tests/validation/fixtures/ArgMinMaxFixture.h
+++ b/tests/validation/fixtures/ArgMinMaxFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,15 +42,14 @@ namespace test
{
namespace validation
{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
class ArgMinMaxValidationBaseFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
+ void setup(TensorShape shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info)
{
- _target = compute_target(shape, data_type, axis, op, q_info);
- _reference = compute_reference(shape, data_type, axis, op, q_info);
+ _target = compute_target(shape, input_type, output_type, axis, op, q_info);
+ _reference = compute_reference(shape, input_type, output_type, axis, op, q_info);
}
protected:
@@ -80,7 +79,7 @@ protected:
case DataType::QASYMM8:
{
std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, 0);
break;
@@ -88,7 +87,7 @@ protected:
case DataType::QASYMM8_SIGNED:
{
std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, 0);
break;
@@ -98,11 +97,11 @@ protected:
}
}
- TensorType compute_target(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
+ TensorType compute_target(TensorShape &src_shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info)
{
// Create tensors
- TensorType src = create_tensor<TensorType>(src_shape, data_type, 1, q_info);
- TensorType dst;
+ TensorType src = create_tensor<TensorType>(src_shape, input_type, 1, q_info);
+ TensorType dst = create_tensor<TensorType>(compute_output_shape(src_shape, axis), output_type, 1, q_info);
// Create and configure function
FunctionType arg_min_max_layer;
@@ -127,41 +126,43 @@ protected:
return dst;
}
- SimpleTensor<int32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
+ TensorShape compute_output_shape(const TensorShape &src_shape, int axis)
+ {
+ return arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false);
+ }
+
+ SimpleTensor<T2> compute_reference(TensorShape &src_shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info)
{
// Create reference
- SimpleTensor<T> src{ src_shape, data_type, 1, q_info };
+ SimpleTensor<T1> src{ src_shape, input_type, 1, q_info };
// Fill reference
fill(src);
- TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false);
- return reference::reduction_operation<T, int32_t>(src, output_shape, axis, op);
+ return reference::reduction_operation<T1, T2>(src, compute_output_shape(src_shape, axis), axis, op, output_type);
}
- TensorType _target{};
- SimpleTensor<int32_t> _reference{};
+ TensorType _target{};
+ SimpleTensor<T2> _reference{};
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ArgMinMaxValidationQuantizedFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class ArgMinMaxValidationQuantizedFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo quantization_info)
+ void setup(const TensorShape &shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo quantization_info)
{
- ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info);
+ ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, input_type, output_type, axis, op, quantization_info);
}
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ArgMinMaxValidationFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class ArgMinMaxValidationFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type, int axis, ReductionOperation op)
+ void setup(const TensorShape &shape, DataType input_type, DataType output_type, int axis, ReductionOperation op)
{
- ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo());
+ ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, input_type, output_type, axis, op, QuantizationInfo());
}
};
} // namespace validation
diff --git a/tests/validation/fixtures/ArithmeticDivisionFixture.h b/tests/validation/fixtures/ArithmeticDivisionFixture.h
index 2c2c01c31c..e11a386130 100644
--- a/tests/validation/fixtures/ArithmeticDivisionFixture.h
+++ b/tests/validation/fixtures/ArithmeticDivisionFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionBroadcastValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type)
{
_target = compute_target(shape0, shape1, data_type);
@@ -117,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionValidationFixture : public ArithmeticDivisionBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ArithmeticDivisionBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, shape, data_type);
diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h
index 1dfc2ce579..0785af1151 100644
--- a/tests/validation/fixtures/ArithmeticOperationsFixture.h
+++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,16 +45,14 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticOperationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
- DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place)
+ void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace)
{
- _op = op;
- _act_info = act_info;
- _in_place = in_place;
- _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
- _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+ _op = op;
+ _act_info = act_info;
+ _is_inplace = is_inplace;
+ _target = compute_target(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+ _reference = compute_reference(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
}
protected:
@@ -64,31 +62,55 @@ protected:
library->fill_tensor_uniform(tensor, i);
}
- TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
+ TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
{
// Create tensors
- TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
- TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
- TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out);
- TensorType *dst_to_use = _in_place ? &ref_src1 : &dst;
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type, 1, qinfo0);
+ TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type, 1, qinfo1);
+ TensorType dst = create_tensor<TensorType>(out_shape, data_type, 1, qinfo_out);
+
+ // Check whether do in-place computation and whether inputs are broadcast compatible
+ TensorType *actual_dst = &dst;
+ if(_is_inplace)
+ {
+ bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out);
+ bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out);
+ bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+ ARM_COMPUTE_ASSERT(do_in_place);
+
+ if(src1_is_inplace)
+ {
+ actual_dst = &ref_src1;
+ }
+ else
+ {
+ actual_dst = &ref_src2;
+ }
+ }
// Create and configure function
FunctionType arith_op;
- arith_op.configure(&ref_src1, &ref_src2, dst_to_use, convert_policy, _act_info);
+ arith_op.configure(&ref_src1, &ref_src2, actual_dst, convert_policy, _act_info);
ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(dst_to_use->info()->is_resizable());
// Allocate tensors
ref_src1.allocator()->allocate();
ref_src2.allocator()->allocate();
- dst_to_use->allocator()->allocate();
ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!dst_to_use->info()->is_resizable());
+
+ // If don't do in-place computation, still need to allocate original dst
+ if(!_is_inplace)
+ {
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+ dst.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+ }
// Fill tensors
fill(AccessorType(ref_src1), 0);
@@ -97,50 +119,40 @@ protected:
// Compute function
arith_op.run();
- if(_in_place)
- {
- return ref_src1;
- }
- return dst;
+ return std::move(*actual_dst);
}
- SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1,
- DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
+ SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
{
- // current in-place implementation only supports same metadata of input and output tensors.
- // By ignoring output quantization information here, we can make test cases implementation much simpler.
- QuantizationInfo output_qinfo = _in_place ? qinfo0 : qinfo_out;
-
// Create reference
- SimpleTensor<T> ref_src1{ shape0, data_type0, 1, qinfo0 };
- SimpleTensor<T> ref_src2{ shape1, data_type1, 1, qinfo1 };
- SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, output_qinfo };
+ SimpleTensor<T> ref_src1{ shape0, data_type, 1, qinfo0 };
+ SimpleTensor<T> ref_src2{ shape1, data_type, 1, qinfo1 };
+ SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), data_type, 1, qinfo_out };
// Fill reference
fill(ref_src1, 0);
fill(ref_src2, 1);
auto result = reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst, convert_policy);
- return _act_info.enabled() ? reference::activation_layer(result, _act_info, output_qinfo) : result;
+ return _act_info.enabled() ? reference::activation_layer(result, _act_info, qinfo_out) : result;
}
TensorType _target{};
SimpleTensor<T> _reference{};
reference::ArithmeticOperation _op{ reference::ArithmeticOperation::ADD };
ActivationLayerInfo _act_info{};
- bool _in_place{};
+ bool _is_inplace{};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class ArithmeticAdditionBroadcastValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1,
- output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
}
};
@@ -148,11 +160,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticAdditionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1,
- output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
}
};
@@ -160,11 +171,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticAdditionBroadcastValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1,
- output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -172,11 +182,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticAdditionValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1,
- output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -184,13 +193,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticAdditionValidationQuantizedFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1,
- output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
+ qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
}
};
@@ -198,13 +205,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticAdditionValidationQuantizedBroadcastFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
- ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out,
+ bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1,
- data_type0, data_type1, output_data_type, convert_policy,
- qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
+ qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
}
};
@@ -212,12 +217,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionBroadcastValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1,
- data_type0, data_type1, output_data_type, convert_policy,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
}
};
@@ -225,13 +228,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionBroadcastValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info,
- bool in_place)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info,
+ bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1,
- data_type0, data_type1, output_data_type, convert_policy,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -239,12 +240,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape,
- data_type0, data_type1, output_data_type, convert_policy,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
}
};
@@ -252,12 +251,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool in_place)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape,
- data_type0, data_type1, output_data_type, convert_policy,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -265,14 +262,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionValidationQuantizedFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place)
+ void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape,
- data_type0, data_type1, output_data_type,
- convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
+ qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
}
};
@@ -280,13 +274,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticSubtractionValidationQuantizedBroadcastFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
- ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out,
+ bool is_inplace)
{
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1,
- data_type0, data_type1, output_data_type, convert_policy,
- qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place);
+ ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
+ qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
}
};
} // namespace validation
diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
index 38b4d4375a..54a0ed9e09 100644
--- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BatchNormalizationLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout)
{
_data_type = dt;
diff --git a/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h
index 1ffb8f38f3..161eeb0ef4 100644
--- a/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h
+++ b/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename ConvolutionFuncti
class BatchNormalizationLayerFusionValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape src_shape, TensorShape w_shape, TensorShape b_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation,
bool use_conv_b, bool use_beta, bool use_gamma, float epsilon, DataType dt, DataLayout data_layout)
{
diff --git a/tests/validation/fixtures/BatchToSpaceLayerFixture.h b/tests/validation/fixtures/BatchToSpaceLayerFixture.h
index 6554c09de4..56a6109dbc 100644
--- a/tests/validation/fixtures/BatchToSpaceLayerFixture.h
+++ b/tests/validation/fixtures/BatchToSpaceLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE
#define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE
+#include "arm_compute/core/Helpers.h"
#include "tests/Globals.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
@@ -39,11 +40,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BatchToSpaceLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout)
+ void setup(const TensorShape &input_shape, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &output_shape, DataType data_type, DataLayout data_layout)
{
- _target = compute_target(input_shape, block_shape_shape, output_shape, data_type, data_layout);
- _reference = compute_reference(input_shape, block_shape_shape, output_shape, data_type);
+ _target = compute_target(input_shape, block_shape, crop_info, output_shape, data_type, data_layout);
+ _reference = compute_reference(input_shape, block_shape, crop_info, output_shape, data_type);
}
protected:
@@ -56,9 +56,10 @@ protected:
DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
- TensorType compute_target(TensorShape input_shape, TensorShape block_shape_shape, TensorShape output_shape,
+ TensorType compute_target(TensorShape input_shape, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, TensorShape output_shape,
DataType data_type, DataLayout data_layout)
{
+ ARM_COMPUTE_ERROR_ON(block_shape.size() != 2U); // Only support batch to 2D space (x, y) for now
if(data_layout == DataLayout::NHWC)
{
permute(input_shape, PermutationVector(2U, 0U, 1U));
@@ -66,64 +67,49 @@ protected:
}
// Create tensors
- TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
- TensorType block_shape = create_tensor<TensorType>(block_shape_shape, DataType::S32);
- TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
+ TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
+ TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
// Create and configure function
FunctionType batch_to_space;
- batch_to_space.configure(&input, &block_shape, &output);
+ batch_to_space.configure(&input, block_shape.at(0), block_shape.at(1), &output, crop_info);
ARM_COMPUTE_ASSERT(input.info()->is_resizable());
- ARM_COMPUTE_ASSERT(block_shape.info()->is_resizable());
ARM_COMPUTE_ASSERT(output.info()->is_resizable());
// Allocate tensors
input.allocator()->allocate();
- block_shape.allocator()->allocate();
output.allocator()->allocate();
ARM_COMPUTE_ASSERT(!input.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!block_shape.info()->is_resizable());
ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
// Fill tensors
fill(AccessorType(input), 0);
- {
- auto block_shape_data = AccessorType(block_shape);
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- for(unsigned int i = 0; i < block_shape_shape.x(); ++i)
- {
- static_cast<int32_t *>(block_shape_data.data())[i] = output_shape[i + idx_width] / input_shape[i + idx_width];
- }
- }
// Compute function
batch_to_space.run();
return output;
}
- SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &block_shape_shape,
- const TensorShape &output_shape, DataType data_type)
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape, const std::vector<int32_t> &block_shape,
+ const CropInfo &crop_info, const TensorShape &output_shape, DataType data_type)
{
+ ARM_COMPUTE_ERROR_ON(block_shape.size() != 2U); // Only support batch to 2D space (x, y) for now
// Create reference
- SimpleTensor<T> input{ input_shape, data_type };
- SimpleTensor<int32_t> block_shape{ block_shape_shape, DataType::S32 };
+ SimpleTensor<T> input{ input_shape, data_type };
// Fill reference
fill(input, 0);
- for(unsigned int i = 0; i < block_shape_shape.x(); ++i)
- {
- block_shape[i] = output_shape[i] / input_shape[i];
- }
// Compute reference
- return reference::batch_to_space(input, block_shape, output_shape);
+ return reference::batch_to_space(input, block_shape, crop_info, output_shape);
}
TensorType _target{};
SimpleTensor<T> _reference{};
};
+
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/BitwiseAndFixture.h b/tests/validation/fixtures/BitwiseAndFixture.h
index 0550532e4b..745a34058e 100644
--- a/tests/validation/fixtures/BitwiseAndFixture.h
+++ b/tests/validation/fixtures/BitwiseAndFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BitwiseAndValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/BitwiseNotFixture.h b/tests/validation/fixtures/BitwiseNotFixture.h
index 9cf1938381..bdfd255156 100644
--- a/tests/validation/fixtures/BitwiseNotFixture.h
+++ b/tests/validation/fixtures/BitwiseNotFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BitwiseNotValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/BitwiseOrFixture.h b/tests/validation/fixtures/BitwiseOrFixture.h
index c4fc7d7c22..03560e0171 100644
--- a/tests/validation/fixtures/BitwiseOrFixture.h
+++ b/tests/validation/fixtures/BitwiseOrFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BitwiseOrValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/BitwiseXorFixture.h b/tests/validation/fixtures/BitwiseXorFixture.h
index 479694c78e..4872b231a5 100644
--- a/tests/validation/fixtures/BitwiseXorFixture.h
+++ b/tests/validation/fixtures/BitwiseXorFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BitwiseXorValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/BoundingBoxTransformFixture.h b/tests/validation/fixtures/BoundingBoxTransformFixture.h
index cd6ce99796..03edaeab16 100644
--- a/tests/validation/fixtures/BoundingBoxTransformFixture.h
+++ b/tests/validation/fixtures/BoundingBoxTransformFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,6 @@ class BoundingBoxTransformGenericFixture : public framework::Fixture
public:
using TDeltas = typename std::conditional<std::is_same<typename std::decay<T>::type, uint16_t>::value, uint8_t, T>::type;
- template <typename...>
void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo)
{
const bool is_qasymm16 = data_type == DataType::QASYMM16;
@@ -215,7 +214,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BoundingBoxTransformFixture : public BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type)
{
BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(deltas_shape, info, data_type, QuantizationInfo());
@@ -228,7 +226,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class BoundingBoxTransformQuantizedFixture : public BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo)
{
BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(deltas_shape, info, data_type, deltas_qinfo);
diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h
index 056f664261..e9d624e6f3 100644
--- a/tests/validation/fixtures/CastFixture.h
+++ b/tests/validation/fixtures/CastFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class CastValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
{
_target = compute_target(shape, dt_in, dt_out, policy);
@@ -86,6 +85,16 @@ protected:
library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), static_cast<int32_t>(signed_max));
break;
}
+ case DataType::U64:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<uint64_t>(unsigned_min), static_cast<uint64_t>(unsigned_max));
+ break;
+ }
+ case DataType::S64:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<int64_t>(signed_min), static_cast<int64_t>(signed_max));
+ break;
+ }
default:
ARM_COMPUTE_ERROR("NOT SUPPORTED!");
}
diff --git a/tests/validation/fixtures/ChannelShuffleLayerFixture.h b/tests/validation/fixtures/ChannelShuffleLayerFixture.h
index 9199d9296d..530dba3893 100644
--- a/tests/validation/fixtures/ChannelShuffleLayerFixture.h
+++ b/tests/validation/fixtures/ChannelShuffleLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ChannelShuffleLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, unsigned int num_groups, DataType data_type, DataLayout data_layout)
{
_target = compute_target(shape, data_type, num_groups, data_layout);
diff --git a/tests/validation/fixtures/Col2ImFixture.h b/tests/validation/fixtures/Col2ImFixture.h
index 88d420aa3d..4d56d607b7 100644
--- a/tests/validation/fixtures/Col2ImFixture.h
+++ b/tests/validation/fixtures/Col2ImFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,10 +45,9 @@ namespace validation
using namespace arm_compute::misc::shape_calculator;
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z>
-class Col2ImValidationFixture : public framework::Fixture
+class Col2ImOpValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, const unsigned int convolved_width, unsigned int convolved_height, unsigned int num_groups, DataType data_type)
{
const Size2D convolved_dims(convolved_width, convolved_height);
@@ -74,7 +73,7 @@ protected:
// Create and configure function
FunctionType col2im_func;
- col2im_func.configure(&src, &dst, convolved_dims, num_groups);
+ col2im_func.configure(src.info(), dst.info(), convolved_dims, num_groups);
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
@@ -89,8 +88,13 @@ protected:
// Fill tensors
fill(AccessorType(src), 0);
+ arm_compute::ITensorPack pack =
+ {
+ { arm_compute::TensorType::ACL_SRC, &src },
+ { arm_compute::TensorType::ACL_DST, &dst }
+ };
// Compute function
- col2im_func.run();
+ col2im_func.run(pack);
return dst;
}
diff --git a/tests/validation/fixtures/ComparisonFixture.h b/tests/validation/fixtures/ComparisonFixture.h
index 6b38a376a4..f25d5abb73 100644
--- a/tests/validation/fixtures/ComparisonFixture.h
+++ b/tests/validation/fixtures/ComparisonFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComparisonValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1)
{
_target = compute_target(op, shape0, shape1, data_type, qinfo0, qinfo1);
@@ -117,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComparisonBroadcastValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type)
{
ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, data_type, QuantizationInfo(), QuantizationInfo());
@@ -128,7 +126,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComparisonValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(ComparisonOperation op, const TensorShape &shape, DataType data_type)
{
ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape, shape, data_type, QuantizationInfo(), QuantizationInfo());
@@ -139,7 +136,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComparisonValidationQuantizedFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(ComparisonOperation op, const TensorShape &shape, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1)
{
@@ -151,7 +147,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComparisonQuantizedBroadcastValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1)
{
ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, data_type, qinfo0, qinfo1);
diff --git a/tests/validation/fixtures/ComputeAllAnchorsFixture.h b/tests/validation/fixtures/ComputeAllAnchorsFixture.h
index e0fe35ba6f..620f1b53fa 100644
--- a/tests/validation/fixtures/ComputeAllAnchorsFixture.h
+++ b/tests/validation/fixtures/ComputeAllAnchorsFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComputeAllAnchorsGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo)
{
_target = compute_target(num_anchors, data_type, info, qinfo);
@@ -107,7 +106,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComputeAllAnchorsFixture : public ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type)
{
ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(num_anchors, info, data_type, QuantizationInfo());
@@ -118,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ComputeAllAnchorsQuantizedFixture : public ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo)
{
ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(num_anchors, info, data_type, qinfo);
diff --git a/tests/validation/fixtures/ConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h
index bafb8b2392..3a021661ac 100644
--- a/tests/validation/fixtures/ConcatenateLayerFixture.h
+++ b/tests/validation/fixtures/ConcatenateLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,6 @@ private:
using SrcITensorType = typename std::conditional<CI, const ITensorType, ITensorType>::type;
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, unsigned int axis)
{
// Create input shapes
diff --git a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
index ae844332c3..7ad14e1b40 100644
--- a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
+++ b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ConvertFullyConnectedWeightsValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int weights_w, DataLayout training_data_layout, DataType data_type)
{
const unsigned int height = input_shape.x() * input_shape.y() * input_shape.z();
@@ -61,7 +60,7 @@ protected:
{
case DataType::QASYMM8:
{
- std::uniform_int_distribution<uint8_t> distribution(0, 10);
+ std::uniform_int_distribution<uint32_t> distribution(0, 10);
library->fill(tensor, distribution, i);
break;
}
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index 6dbf3d5731..0622e5e6f0 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/graph/Utils.h"
+#ifdef ARM_COMPUTE_OPENCL_ENABLED
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
+#endif // ARM_COMPUTE_OPENCL_ENABLED
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+#include "src/graph/mutators/MutatorUtils.h"
#include "tests/AssetsLibrary.h"
#include "tests/Globals.h"
#include "tests/IAccessor.h"
@@ -35,10 +42,12 @@
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ActivationLayer.h"
#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/PadLayer.h"
#include "tests/validation/reference/Permute.h"
#include "tests/validation/reference/Utils.h"
#include <random>
+#include <type_traits>
namespace arm_compute
{
@@ -49,13 +58,30 @@ namespace validation
namespace detail
{
template <typename ConvolutionFunction, typename TensorType>
-void configure_conv_function(ConvolutionFunction &func,
+#ifdef ARM_COMPUTE_OPENCL_ENABLED
+std::enable_if_t<!std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void>
+#else // ARM_COMPUTE_OPENCL_ENABLED
+void
+#endif // ARM_COMPUTE_OPENCL_ENABLED
+configure_conv_function(ConvolutionFunction &func,
+ TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst,
+ const PadStrideInfo &info, const WeightsInfo &weights_info,
+ const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+{
+ func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, false /* enable_fast_math */, num_groups);
+}
+
+#ifdef ARM_COMPUTE_OPENCL_ENABLED
+template <typename ConvolutionFunction, typename TensorType>
+std::enable_if_t<std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void>
+configure_conv_function(ConvolutionFunction &func,
TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst,
const PadStrideInfo &info, const WeightsInfo &weights_info,
const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
{
func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, num_groups);
}
+#endif // ARM_COMPUTE_OPENCL_ENABLED
} // namespace detail
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
@@ -66,25 +92,67 @@ public:
|| std::is_same<typename std::decay<T>::type, int8_t>::value,
int32_t, T >::type;
+ void setup_quantization(TensorShape input_shape, TensorShape weights_shape, QuantizationInfo &input_q_info,
+ QuantizationInfo &weights_q_info, DataType data_type)
+ {
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ _quantization_info = QuantizationInfo(scale_lhs, offset_lhs);
+ _weight_quantization_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+ QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info,
+ weights_shape.y() /* heights */, weights_shape.x() /* width */, input_shape.z() /* channels */,
+ data_type, 0.5f /* bias_fraction */);
+
+ _dst_q_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+ }
+
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights,
DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info,
- bool mixed_layout = false)
+ bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false)
{
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] +
+ + weights_shape[0] + weights_shape[1] + weights_shape[2] + weights_shape[3] +
+ mixed_layout + (data_type == DataType::QASYMM8_SIGNED) + (data_layout == DataLayout::NHWC);
+
_mixed_layout = mixed_layout;
_data_type = data_type;
_weights_data_type = weights_data_type;
- _is_quantized = is_data_type_quantized_asymmetric(data_type);
+ const bool is_quantized = is_data_type_quantized(weights_data_type);
_is_bfloat16 = data_type == DataType::BFLOAT16;
- _bias_data_type = _is_quantized ? DataType::S32 : (_is_bfloat16 ? DataType::F32 : data_type);
+ _bias_data_type = is_quantized ? DataType::S32 : (_is_bfloat16 ? DataType::F32 : data_type);
_output_data_type = _is_bfloat16 ? DataType::F32 : data_type;
_quantization_info = quantization_info;
_weight_quantization_info = weight_quantization_info;
_data_layout = data_layout;
+ _dst_q_info = quantization_info;
+
+ if(is_quantized && !is_data_type_quantized_symmetric(weights_data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY))
+ {
+ setup_quantization(input_shape, weights_shape, _quantization_info, _weight_quantization_info, data_type);
+ _use_dynamic_output_quant = true;
+ }
- _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info);
- _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info);
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer, padded_weights);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info, pre_pad_layer);
}
protected:
@@ -118,16 +186,34 @@ protected:
{
case DataType::QASYMM8:
{
- std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second);
- library->fill(tensor, distribution, i);
+ if(_use_dynamic_output_quant)
+ {
+ std::uniform_int_distribution<int32_t> distribution(0, 255);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ // Legacy initialization in case the output quantization info can't be reliably estimated
+ std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
+ library->fill(tensor, distribution, i);
+ }
break;
}
case DataType::QASYMM8_SIGNED:
{
- std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
- library->fill(tensor, distribution, i);
+ if(_use_dynamic_output_quant)
+ {
+ std::uniform_int_distribution<int32_t> distribution(-128, 127);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ // Legacy initialization in case the output quantization info can't be reliably estimated
+ std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+ std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
+ library->fill(tensor, distribution, i);
+ }
break;
}
case DataType::QSYMM8_PER_CHANNEL:
@@ -146,13 +232,13 @@ protected:
max_bound = bounds.second;
}
}
- std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound);
+ std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
library->fill(tensor, distribution, i);
break;
}
case DataType::S32:
{
- std::uniform_int_distribution<int32_t> distribution(-100, 100);
+ std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
library->fill(tensor, distribution, i);
break;
}
@@ -179,8 +265,9 @@ protected:
}
}
+ // given input is IN nchw format
TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info,
- bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info)
+ bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false)
{
ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
@@ -191,6 +278,18 @@ protected:
permute(input_shape, PermutationVector(2U, 0U, 1U));
permute(weights_shape, PermutationVector(2U, 0U, 1U));
permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+ if(pre_pad_layer.size() > 0)
+ {
+ // make sure paddings exist for each c,h,w dimensions
+ for(unsigned int i = 0; i < 3 - pre_pad_layer.size(); ++i)
+ {
+ pre_pad_layer.push_back({ 0, 0 });
+ }
+
+ // rotate padding info from nchw to nhwc
+ std::rotate(pre_pad_layer.begin(), pre_pad_layer.begin() + 2, pre_pad_layer.begin() + 3);
+ }
}
const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
@@ -202,19 +301,47 @@ protected:
// Create tensors
TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info, _data_layout);
TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _weights_data_type, 1, _weight_quantization_info, _data_layout);
- TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info, _data_layout);
- TensorType dst = create_tensor<TensorType>(output_shape, _output_data_type, 1, _quantization_info, _data_layout);
+ TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, QuantizationInfo() /*bias is not a quantized type*/, _data_layout);
+ TensorType dst = create_tensor<TensorType>(output_shape, _output_data_type, 1, _dst_q_info, _data_layout);
// Create and configure function
FunctionType conv;
- detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups);
+
+ const unsigned int height_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::HEIGHT);
+ const unsigned int width_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::WIDTH);
+
+ const PaddingInfo pad_w = width_index < pre_pad_layer.size() ? pre_pad_layer[width_index] : PaddingInfo(0, 0);
+ const PaddingInfo pad_h = height_index < pre_pad_layer.size() ? pre_pad_layer[height_index] : PaddingInfo(0, 0);
+
+ if(pre_pad_layer.size() > 0 && arm_compute::graph::is_padding_in_height_or_width(_data_layout, pre_pad_layer))
+ {
+ // this is the logic implemented in NodeFusionMutator -> fuse_pad_with_convolution
+ const PadStrideInfo new_conv_info(
+ info.stride().first,
+ info.stride().second,
+ info.pad_left() + pad_w.first,
+ info.pad_right() + pad_w.second,
+ info.pad_top() + pad_h.first,
+ info.pad_bottom() + pad_h.second,
+ info.round());
+ detail::configure_conv_function(conv, &src, &weights, &bias, &dst, new_conv_info, weights_info, dilation, act_info, num_groups);
+ }
+ else
+ {
+ detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups);
+ }
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(weights.info()->is_resizable());
ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
-
- add_padding_x({ &src, &weights, &bias, &dst }, _data_layout);
+ // Test "add padding after configure" behavior. This behavior should not affect the correctness
+ add_padding_x({ &src, &bias, &dst }, _data_layout);
+ // Padding weights may affect code path in some backends
+ if (padded_weights)
+ {
+ add_padding_x({ &weights }, _data_layout);
+ }
// Allocate tensors
src.allocator()->allocate();
@@ -228,9 +355,9 @@ protected:
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
- fill(AccessorType(src), 0);
- fill(AccessorType(weights), 1);
- fill(AccessorType(bias), 2);
+ fill(AccessorType(src), 0 + _hash);
+ fill(AccessorType(weights), 1 + _hash);
+ fill(AccessorType(bias), 2 + _hash);
if(_mixed_layout)
{
@@ -246,7 +373,7 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
- const Size2D &dilation, const ActivationLayerInfo act_info)
+ const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
{
ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
@@ -262,9 +389,9 @@ protected:
SimpleTensor<TW> weights{ weights_shape, weights_dt, 1, _weight_quantization_info };
SimpleTensor<TBias> bias{ bias_shape, bias_dt, 1, _quantization_info };
- fill(src, 0);
- fill(weights, 1);
- fill(bias, 2);
+ fill(src, 0 + _hash);
+ fill(weights, 1 + _hash);
+ fill(bias, 2 + _hash);
// Fill with bfloat16 to perform the conversion and reduce the mismatches in the output
if(_is_bfloat16)
@@ -273,9 +400,14 @@ protected:
regularize_values(static_cast<void *>(weights.data()), weights.num_elements());
}
- return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups),
+ if(pre_pad_layer.size() > 0)
+ {
+ src = reference::pad_layer<T>(src, pre_pad_layer, PixelValue(0), PaddingMode::CONSTANT);
+ }
+
+ return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups, _dst_q_info),
act_info) :
- reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups);
+ reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups, _dst_q_info);
}
TensorType _target{};
@@ -287,16 +419,19 @@ protected:
DataLayout _data_layout{};
QuantizationInfo _quantization_info{};
QuantizationInfo _weight_quantization_info{};
- bool _is_quantized = false;
+ QuantizationInfo _dst_q_info{};
bool _is_bfloat16 = false;
bool _mixed_layout = false;
+ bool _use_dynamic_output_quant{false};
+ int32_t _hash{0};
+ int32_t _min_bias{-100};
+ int32_t _max_bias{100};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
DataLayout data_layout, ActivationLayerInfo act_info)
{
@@ -307,10 +442,35 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class ConvolutionValidationPaddedWeightsFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+ void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
+ DataLayout data_layout)
+ {
+ ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
+ data_type, data_type, data_layout,
+ QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), mixed_layout, PaddingList({}), true);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class ConvolutionValidationWithPaddingFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+ void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
+ DataLayout data_layout, ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
+ {
+ ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
+ data_type, data_type, data_layout,
+ QuantizationInfo(), QuantizationInfo(), act_info, mixed_layout, pre_pad_layer);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info)
{
@@ -323,7 +483,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ConvolutionValidationQuantizedPerChannelFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataType weights_data_type)
{
@@ -339,7 +498,311 @@ public:
quantization_info, QuantizationInfo(weights_scales), act_info);
}
};
+
+
+#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+inline TensorInfo prepare_weights(const TensorInfo tensor_info, const arm_compute::WeightFormat weight_format)
+{
+ const DataLayout data_layout = tensor_info.data_layout();
+ ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS);
+ const DataType data_type = tensor_info.data_type();
+ const TensorShape tensor_shape = tensor_info.tensor_shape();
+ const int N = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O
+ const int H = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
+ const int W = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
+ const int C = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I
+
+ const int interleave_by = arm_compute::interleave_by(weight_format);
+ const int block_by = arm_compute::block_by(weight_format);
+ const int Ip = arm_gemm::roundup<unsigned int>(C, block_by); // C'=I'
+ const int Op = arm_gemm::roundup<unsigned int>(N, interleave_by); // O'=N'
+
+ arm_compute::Strides strides_in_bytes = tensor_info.strides_in_bytes();
+ strides_in_bytes.set(1, Ip * interleave_by * H * W * tensor_info.element_size());
+ strides_in_bytes.set(2, Ip * Op * tensor_info.element_size());
+
+ const size_t offset_first_element_in_bytes = tensor_info.offset_first_element_in_bytes();
+
+ // Total size needs to include padded dimensions
+ const size_t total_size_in_bytes = Op * H * W * Ip * tensor_info.element_size();
+
+ const TensorShape TS(Ip, W, H, Op);
+
+ TensorInfo new_tensor_info = tensor_info;
+ new_tensor_info.init(TS, 1 /*num_channels, deprecated*/, data_type, strides_in_bytes,
+ offset_first_element_in_bytes, total_size_in_bytes);
+ return new_tensor_info;
+}
+
+template <typename ScalarType, typename AccessorType>
+inline void rearrange_data(const AccessorType src, AccessorType dst, const arm_compute::WeightFormat weight_format)
+{
+ ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(weight_format), framework::LogLevel::ERRORS);
+ // Data Layout: OHWIo<interleave_by>i<block_by>
+ const int interleave_by = arm_compute::interleave_by(weight_format);
+ const int block_by = arm_compute::block_by(weight_format);
+ const TensorShape src_tensor_shape = src.shape();
+ const DataLayout data_layout = src.data_layout();
+ ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS);
+ const unsigned int O = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O
+ const unsigned int H = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
+ const unsigned int W = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
+ const unsigned int I = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I
+ const unsigned int Ip = arm_gemm::roundup<unsigned int>(I, block_by); // C'=I'
+ const unsigned int Op = arm_gemm::roundup<unsigned int>(O, interleave_by); // N'=O'
+
+ ARM_COMPUTE_EXPECT_EQUAL(Op * H * W * Ip, (unsigned)dst.num_elements(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(src.num_elements() <= dst.num_elements(), framework::LogLevel::ERRORS);
+
+ const ScalarType *src_ptr = reinterpret_cast<const ScalarType *>(src.data());
+ ScalarType *dst_ptr = reinterpret_cast<ScalarType *>(dst.data());
+ for(unsigned i = 0; i < I; ++i)
+ for(unsigned w = 0; w < W; ++w)
+ for(unsigned h = 0; h < H; ++h)
+ for(unsigned o = 0; o < O; ++o)
+ {
+ ScalarType src_element;
+ switch(data_layout)
+ {
+ case DataLayout::NHWC:
+ {
+ src_element = src_ptr[o * H * W * I + h * W * I + w * I + i];
+ }
+ break;
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported memory layout.");
+ }
+ }
+ const int x5 = std::floor(((float)o) / interleave_by);
+ const int x4 = h;
+ const int x3 = w;
+ const int x2 = std::floor((float)i / block_by);
+ const int x1 = o % interleave_by;
+ const int x0 = i % block_by;
+ unsigned dst_idx = x5 * H * W * Ip * interleave_by
+ + x4 * W * Ip * interleave_by
+ + x3 * Ip * interleave_by
+ + x2 * interleave_by * block_by
+ + x1 * block_by
+ + x0;
+ dst_ptr[dst_idx] = src_element;
+ }
+}
+
+template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
+class VariableWeightsFixtureBaseClass : public framework::Fixture
+{
+public:
+ void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataLayout data_layout,
+ const DataType data_type)
+ {
+ conv = std::make_unique<ConvolutionFunction>();
+ // prepare data
+ _data_layout = data_layout;
+ // Fixed format kernels for variable weights can work only with NHWC format.
+ ARM_COMPUTE_EXPECT_EQUAL(_data_layout, DataLayout::NHWC, framework::LogLevel::ERRORS);
+ _data_type = data_type;
+ // run the code
+ compute_target(input_shape, weights_shape, bias_shape, output_shape, info, dilation);
+ compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation);
+ }
+ void teardown()
+ {
+ _target.allocator()->free();
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+private:
+ virtual void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation) = 0;
+
+ void compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &conv_info,
+ const Size2D &dilation)
+ {
+ // The dataset is always in NCHW format - we need to make C the
+ // innermost dimension because the fixed-format kernel work only
+ // with NHWC layout.
+ permute(input_shape, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape, PermutationVector(2U, 0U, 1U));
+ permute(output_shape, PermutationVector(2U, 0U, 1U));
+ const auto src_tensor_info = TensorInfo(input_shape, 1, _data_type, _data_layout);
+ const auto weight_tensor_info = TensorInfo(weights_shape, 1, _data_type, _data_layout);
+ const auto bias_tensor_info = TensorInfo(bias_shape, 1, _data_type, _data_layout);
+ auto dst_tensor_info = TensorInfo(output_shape, 1, _data_type, _data_layout);
+
+ const int kernel_height = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT)];
+ const int kernel_width = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH)];
+ const int num_kernels = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::BATCHES)];
+
+ const WeightsInfo query_weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, arm_compute::WeightFormat::ANY);
+ const bool kernel_found = bool(ConvolutionFunction::has_opt_impl(_computed_weight_format, &src_tensor_info, &weight_tensor_info,
+ &bias_tensor_info, &dst_tensor_info, conv_info, query_weights_info));
+ // Make surethat the setup founds a fixed-format kernel as requested by the test case.
+ ARM_COMPUTE_EXPECT(kernel_found, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(_computed_weight_format), framework::LogLevel::ERRORS);
+
+ const WeightsInfo weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, _computed_weight_format);
+ configure_and_execute_kernel(src_tensor_info, weight_tensor_info, bias_tensor_info, dst_tensor_info, weights_info, conv_info,
+ dilation);
+ }
+ void compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
+ const Size2D &dilation)
+ {
+ ARM_COMPUTE_UNUSED(input_shape, weights_shape, bias_shape, output_shape, info,
+ dilation);
+
+ // Create reference
+ SimpleTensor<ScalarType> src{ input_shape, _data_type };
+ SimpleTensor<ScalarType> weights{ weights_shape, _data_type };
+ SimpleTensor<ScalarType> bias{ bias_shape, _data_type };
+ fill(src, 0);
+ fill(bias, 1);
+ fill(weights, 3);
+ _reference = reference::convolution_layer<ScalarType>(src, weights, bias, output_shape, info, dilation, 1 /*num_groups*/);
+ }
+ DataLayout _data_layout{};
+ DataType _data_type{};
+
+protected:
+ std::unique_ptr<ConvolutionFunction> conv{};
+ arm_compute::WeightFormat _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED };
+ TensorClass _target{};
+ SimpleTensor<ScalarType> _reference{};
+};
+
+template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
+class VariableWeightsFixture : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math>
+{
+ void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation)
+ {
+ this->conv->configure(&src_tensor_info, &weight_tensor_info, &bias_tensor_info, &dst_tensor_info, conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math);
+
+ // Allocate input tensors
+ auto src = create_tensor<TensorClass>(src_tensor_info);
+ auto weights_original = create_tensor<TensorClass>(weight_tensor_info);
+ const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info, this->_computed_weight_format);
+ auto weights_transformed = create_tensor<TensorClass>(new_tensor_info);
+ auto bias = create_tensor<TensorClass>(bias_tensor_info);
+ src.allocator()->allocate();
+ weights_original.allocator()->allocate();
+ weights_transformed.allocator()->allocate();
+ bias.allocator()->allocate();
+ // Allocate destination tensor
+ this->_target = create_tensor<TensorClass>(dst_tensor_info);
+ this->_target.allocator()->allocate();
+
+ // Prepare source and biases that are left unchanged.
+ this->fill(AccessorType(src), 0);
+ this->fill(AccessorType(bias), 1);
+
+ // First run
+ this->fill(AccessorType(weights_original), 2);
+ rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
+ ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weights_transformed }, { TensorType::ACL_SRC_2, &bias }, { TensorType::ACL_DST, &(this->_target) } };
+ this->conv->run(run_pack);
+ // Second run, with new weights
+ this->fill(AccessorType(weights_original), 3);
+ rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
+ this->conv->run(run_pack);
+ src.allocator()->free();
+ weights_original.allocator()->free();
+ weights_transformed.allocator()->free();
+ bias.allocator()->free();
+ }
+};
+
+template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
+class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math>
+{
+ void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation)
+ {
+ // Allocate input tensors
+ auto src = create_tensor<TensorClass>(src_tensor_info);
+ auto weights_original = create_tensor<TensorClass>(weight_tensor_info);
+ const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info, this->_computed_weight_format);
+ auto weights_transformed = create_tensor<TensorClass>(new_tensor_info);
+ auto bias = create_tensor<TensorClass>(bias_tensor_info);
+ src.allocator()->allocate();
+ weights_original.allocator()->allocate();
+ weights_transformed.allocator()->allocate();
+ bias.allocator()->allocate();
+ // Allocate destination tensor
+ this->_target = create_tensor<TensorClass>(dst_tensor_info);
+ this->_target.allocator()->allocate();
+ this->conv->configure(&src, &weights_transformed, &bias, &(this->_target), conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math);
+ // Prepare source and biases that are left unchanged.
+ this->fill(AccessorType(src), 0);
+ this->fill(AccessorType(bias), 1);
+
+ // First run
+ this->fill(AccessorType(weights_original), 2);
+ rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
+ this->conv->run();
+ // Second run, with new weights
+ this->fill(AccessorType(weights_original), 3);
+ rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
+ this->conv->run();
+ src.allocator()->free();
+ weights_original.allocator()->free();
+ weights_transformed.allocator()->free();
+ bias.allocator()->free();
+ }
+};
+
+template <typename ConvolutionClass, bool enable_fast_math>
+class HasOptImplFixture : public framework::Fixture
+{
+public:
+ void setup(DataType data_type, arm_compute::WeightFormat query_weight_format)
+ {
+ auto conv = std::make_unique<ConvolutionClass>();
+ const auto src_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC);
+ const auto weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC);
+ const auto bias_info = TensorInfo(TensorShape(64U), 1, data_type, DataLayout::NHWC);
+ auto dst_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC);
+ const auto conv_info = PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR);
+ const WeightsInfo weights_info(false, 3U, 3U, 64U, false, query_weight_format);
+ _kernel_found = bool(ConvolutionClass::has_opt_impl(_computed_weight_format, &src_info, &weight_info,
+ &bias_info, &dst_info, conv_info, weights_info,
+ Size2D(1U, 1U) /*dilation*/, ActivationLayerInfo() /*act_info*/, enable_fast_math));
+ }
+
+protected:
+ bool _kernel_found{ false };
+ arm_compute::WeightFormat _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED };
+};
+#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE */
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/CopyFixture.h b/tests/validation/fixtures/CopyFixture.h
index eca75e7b69..f5e711a500 100644
--- a/tests/validation/fixtures/CopyFixture.h
+++ b/tests/validation/fixtures/CopyFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class CopyFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
{
_target = compute_target(input_shape, output_shape, data_type);
diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h
index 5aa63c4ef8..30a3fd8569 100644
--- a/tests/validation/fixtures/CropResizeFixture.h
+++ b/tests/validation/fixtures/CropResizeFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class CropResizeFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method,
float extrapolation_value, bool is_outside_bounds, DataType data_type)
{
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index 14f071eed0..83170c413c 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,6 @@ public:
using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value || std::is_same<typename std::decay<T>::type, int8_t>::value, int32_t, T >::type;
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
DataType data_type, DataType weights_data_type, DataLayout data_layout,
QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, QuantizationInfo weights_quantization_info, bool add_bias)
@@ -75,14 +74,14 @@ protected:
case DataType::QASYMM8:
{
std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, i);
break;
}
case DataType::QASYMM8_SIGNED:
{
std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, i);
break;
}
@@ -102,7 +101,7 @@ protected:
max_bound = bounds.second;
}
}
- std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound);
+ std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
library->fill(tensor, distribution, i);
break;
}
@@ -246,11 +245,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias)
{
- ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
const TensorShape bias_shape(num_kernels);
const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
@@ -267,11 +264,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top,
unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias)
{
- ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
const TensorShape bias_shape(num_kernels);
const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL);
@@ -288,11 +283,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias)
{
- ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
const TensorShape bias_shape(num_kernels);
const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
@@ -310,12 +303,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionValidationQuantizedPerChannelFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, TW>
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias,
DataType weights_data_type)
{
- ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
const TensorShape bias_shape(num_kernels);
const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h
index 130b583dc1..f55d20bf3e 100644
--- a/tests/validation/fixtures/DepthConvertLayerFixture.h
+++ b/tests/validation/fixtures/DepthConvertLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DepthConvertLayerValidationBaseFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info)
{
_shift = shift;
@@ -61,7 +60,7 @@ protected:
if(is_data_type_quantized(tensor.data_type()))
{
std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, i);
}
@@ -130,7 +129,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
{
DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy,
@@ -142,7 +140,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DepthConvertLayerValidationQuantizedFixture : public DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info)
{
DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy,
diff --git a/tests/validation/fixtures/DepthToSpaceLayerFixture.h b/tests/validation/fixtures/DepthToSpaceLayerFixture.h
index b7f5552474..abe3d8b22f 100644
--- a/tests/validation/fixtures/DepthToSpaceLayerFixture.h
+++ b/tests/validation/fixtures/DepthToSpaceLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DepthToSpaceLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, int32_t block_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout)
{
_target = compute_target(input_shape, block_shape, output_shape, data_type, data_layout);
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 2c943735ca..6e2e3a3846 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -38,6 +38,7 @@
#include "utils/Utils.h"
+#include <cstdint>
#include <random>
namespace arm_compute
@@ -54,25 +55,61 @@ class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixt
public:
using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
+ void setup_quantization(TensorShape input_shape, TensorShape weights_shape, QuantizationInfo &input_q_info,
+ QuantizationInfo &weights_q_info, DataType data_type)
+ {
+ ARM_COMPUTE_UNUSED(input_shape);
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ _input_quantization_info = QuantizationInfo(scale_lhs, offset_lhs);
+ _weights_quantization_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+ QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info,
+ weights_shape.y() /* heights */, weights_shape.x() /* width */, 1 /* channels */,
+ data_type, 0.5f /* bias_fraction */);
+
+ _output_quantization_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+ }
+
public:
- template <typename...>
void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
- DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false)
+ DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false)
{
+ ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ _hash = in_shape[0] + in_shape[1] + in_shape[2] + in_shape[3] +
+ kernel_size.width + kernel_size.height + dilation.x() +
+ dilation.y() + pad_stride_info.pad_bottom() + pad_stride_info.pad_left() + pad_stride_info.pad_right() + pad_stride_info.pad_top();
+
_mixed_layout = mixed_layout;
_input_shape = in_shape;
_input_data_type = input_data_type;
_weights_data_type = weights_data_type;
- _input_quantization_info = input_quantization_info;
- _weights_quantization_info = weights_quantization_info;
- _output_quantization_info = output_quantization_info;
_data_layout = data_layout;
_pad_stride_info = pad_stride_info;
_act_info = act_info;
_depth_multiplier = depth_multiplier;
_dilation = dilation;
+ _in_place = in_place;
+ _run_twice = run_twice;
_bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
@@ -85,6 +122,16 @@ public:
_weights_shape.set(2, _output_shape.z());
_biases_shape = TensorShape(_weights_shape[2]);
+
+ _input_quantization_info = input_quantization_info;
+ _weights_quantization_info = weights_quantization_info;
+ _output_quantization_info = output_quantization_info;
+
+ if(is_data_type_quantized(_input_data_type) && !is_data_type_quantized_symmetric(weights_data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY))
+ {
+ setup_quantization(in_shape, _weights_shape, _input_quantization_info, _weights_quantization_info, _input_data_type);
+ _use_dynamic_output_quant = true;
+ }
}
void configure_target()
@@ -101,13 +148,28 @@ public:
}
// Create tensors
- _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
- _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
- _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
- _target = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+ _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
+ _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+ if(_run_twice) {
+ _weights.info()->set_are_values_constant(false);
+ }
+ _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
+ TensorType *target_to_use = nullptr;
+ if(!_in_place)
+ {
+ _target = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+ target_to_use = &_target;
+ }
+
+ add_padding_x({ &_src, &_biases }, _data_layout);
+ add_padding_x({ &_weights }, _data_layout, true);
+ if(!_in_place)
+ {
+ add_padding_x({ &_target }, _data_layout);
+ }
// Create Depthwise Convolution configure function
- _dwc.configure(&_src, &_weights, &_biases, &_target, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
+ _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -117,24 +179,35 @@ public:
void allocate_and_run_target()
{
- // TODO: uncomment after COMPMID-4361
- // add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
-
// Allocate tensors
_src.allocator()->allocate();
_weights.allocator()->allocate();
_biases.allocator()->allocate();
- _target.allocator()->allocate();
ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+
+ if(!_in_place)
+ {
+ _target.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+ }
// Fill tensors
- fill(AccessorType(_src), 0);
- fill(AccessorType(_weights), 1);
- fill(AccessorType(_biases), 2);
+ fill(AccessorType(_src), 0 + _hash);
+ fill(AccessorType(_weights), 1 + _hash);
+ fill(AccessorType(_biases), 2 + _hash);
+
+ // Run with variable input
+ if(_run_twice) {
+ _dwc.run();
+
+ // Fill tensors with a new seed
+ fill(AccessorType(_src), 3 + _hash);
+ fill(AccessorType(_weights), 4 + _hash);
+ fill(AccessorType(_biases), 5 + _hash);
+ }
if(_mixed_layout)
{
@@ -153,9 +226,20 @@ public:
SimpleTensor<TW> weights{ _weights_shape, _weights_data_type, 1, _weights_quantization_info };
SimpleTensor<TBias> biases{ _biases_shape, _bias_data_type, 1, _input_quantization_info };
- fill(src, 0);
- fill(weights, 1);
- fill(biases, 2);
+ fill(src, 0 + _hash);
+ fill(weights, 1 + _hash);
+ fill(biases, 2 + _hash);
+
+ if(_run_twice) {
+ SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
+ if(_act_info.enabled()) {
+ reference::activation_layer<T>(depth_out, _act_info);
+ }
+
+ fill(src, 3 + _hash);
+ fill(weights, 4 + _hash);
+ fill(biases, 5 + _hash);
+ }
SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
_reference = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out;
@@ -164,6 +248,7 @@ public:
protected:
void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst)
{
+ ARM_COMPUTE_ERROR_ON(_in_place);
// Test Multi DataLayout graph cases, when the data layout changes after configure
src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
@@ -183,32 +268,77 @@ protected:
{
case DataType::QASYMM8:
{
- std::uniform_int_distribution<uint8_t> distribution(0, 10);
- library->fill(tensor, distribution, i);
+ if(_use_dynamic_output_quant)
+ {
+ std::uniform_int_distribution<int32_t> distribution(0, 255);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ // Legacy initialization in case the output quantization info can't be reliably estimated
+ std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
+ library->fill(tensor, distribution, i);
+ }
break;
}
case DataType::QASYMM8_SIGNED:
+ {
+ if(_use_dynamic_output_quant)
+ {
+ std::uniform_int_distribution<int32_t> distribution(-128, 127);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ // Legacy initialization in case the output quantization info can't be reliably estimated
+ std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+ std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ }
case DataType::QSYMM8_PER_CHANNEL:
{
- std::uniform_int_distribution<int8_t> distribution(-10, 10);
+ int min_bound = 128;
+ int max_bound = -127;
+ for(size_t i = 0; i < _weights_quantization_info.scale().size(); i++)
+ {
+ std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
+ if(bounds.first < min_bound)
+ {
+ min_bound = bounds.first;
+ }
+ if(bounds.second > max_bound)
+ {
+ max_bound = bounds.second;
+ }
+ }
+ std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
library->fill(tensor, distribution, i);
break;
}
- case DataType::F16:
+ case DataType::S32:
{
- arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
+ std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
library->fill(tensor, distribution, i);
break;
}
- case DataType::F32:
+ case DataType::BFLOAT16:
{
- std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{ -1.0f, 1.0f };
library->fill(tensor, distribution, i);
break;
}
- case DataType::S32:
+ case DataType::F16:
{
- std::uniform_int_distribution<int32_t> distribution(-100, 100);
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
library->fill(tensor, distribution, i);
break;
}
@@ -241,19 +371,32 @@ protected:
unsigned int _depth_multiplier{};
Size2D _dilation{};
bool _mixed_layout{ false };
+ bool _in_place{ false };
+ bool _run_twice{ false };
+ bool _use_dynamic_output_quant{false};
+
+ int32_t _hash{0};
+ // Random initialization limits
+ // Default values are previously handcrafted limits
+ // that sould be used when we don't use dynamic quantization
+ int32_t _min_bias{-100};
+ int32_t _max_bias{100};
+ int32_t _min_u8{0};
+ int32_t _max_u8{50};
+ int32_t _min_s8{-25};
+ int32_t _max_s8{25};
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false>
class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout,
ActivationLayerInfo act_info)
{
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
- data_layout, act_info, mixed_layout);
+ data_layout, act_info, mixed_layout, in_place, run_twice);
}
};
@@ -261,7 +404,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DepthwiseConvolutionLayerNativeValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type,
DataLayout data_layout)
{
@@ -276,7 +418,7 @@ public:
if(padding_valid)
{
- _conv_info = PadStrideInfo();
+ _conv_info = PadStrideInfo(stride.width, stride.height);
}
else
{
@@ -301,6 +443,9 @@ public:
_biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
_target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+ add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+ add_padding_x({ &_weights }, _data_layout, true);
+
// Create Depthwise Convolution configure function
const ConvolutionInfo info
{
@@ -316,8 +461,6 @@ public:
void allocate_and_run_target()
{
- add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
-
// Allocate tensors
_src.allocator()->allocate();
_weights.allocator()->allocate();
@@ -394,20 +537,21 @@ protected:
unsigned int _depth_multiplier{};
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool in_place = false>
class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type,
- DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0)
+ DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image)
{
- _dilation = dilation;
- _depth_multiplier = depth_multiplier;
- _data_type = data_type;
- _data_layout = data_layout;
- _act_info = act_info;
- _n0 = n0;
+ _dilation = dilation;
+ _depth_multiplier = depth_multiplier;
+ _data_type = data_type;
+ _data_layout = data_layout;
+ _act_info = act_info;
+ _n0 = n0;
+ _export_to_cl_image = export_to_cl_image;
+ _in_place = in_place;
_input_shape = TensorShape(width, height, channel, batch);
_weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
@@ -415,16 +559,29 @@ public:
if(padding_valid)
{
- _conv_info = PadStrideInfo();
+ _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
}
else
{
- _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
+ _conv_info = PadStrideInfo(stride.width, stride.height);
}
}
void configure_target()
{
+#if defined(ARM_COMPUTE_OPENCL_ENABLED)
+ if(_export_to_cl_image)
+ {
+ _validate_output &= image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+ _validate_output &= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0);
+ }
+#endif // ARM_COMPUTE_OPENCL_ENABLED
+
+ if(!_validate_output)
+ {
+ return;
+ }
+
TensorShape input_shape = _input_shape;
TensorShape weights_shape = _weights_shape;
@@ -435,19 +592,32 @@ public:
}
// Create tensors
- _src = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
- _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
- _biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
- _target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+ _src = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+ _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+ _biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+ TensorType *target_to_use = nullptr;
+ if(!_in_place)
+ {
+ _target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+ target_to_use = &_target;
+ }
+
+ DWCComputeKernelInfo dwc_info;
+ dwc_info.n0 = _n0;
+ dwc_info.m0 = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1;
+ dwc_info.export_input_to_cl_image = false;
+ dwc_info.export_weights_to_cl_image = _export_to_cl_image;
- DWCWeightsKernelInfo dwc_weights_info;
- dwc_weights_info.n0 = _n0;
+ const ConvolutionInfo conv_kernel_info
+ {
+ _conv_info, _depth_multiplier, _act_info, _dilation
+ };
- DWCKernelInfo dwc_info;
- dwc_info.activation_info = _act_info;
+ add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+ add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used
// Create Depthwise Convolution configure function
- _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
+ _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info);
ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -457,18 +627,24 @@ public:
void allocate_and_run_target()
{
- add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+ if(!_validate_output)
+ {
+ return;
+ }
// Allocate tensors
_src.allocator()->allocate();
_weights.allocator()->allocate();
_biases.allocator()->allocate();
- _target.allocator()->allocate();
ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+ if(!_in_place)
+ {
+ _target.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+ }
// Fill tensors
fill(AccessorType(_src), 0);
@@ -477,17 +653,28 @@ public:
// Test Multi DataLayout graph cases, when the data layout changes after configure
_src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
- _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+ if(!_in_place)
+ {
+ _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+ }
// Compute function
_dwc.run();
// Reinstating original data layout for the test suite to properly check the values
- _target.info()->set_data_layout(_data_layout);
+ if(!_in_place)
+ {
+ _target.info()->set_data_layout(_data_layout);
+ }
}
void compute_reference()
{
+ if(!_validate_output)
+ {
+ return;
+ }
+
SimpleTensor<T> src{ _input_shape, _data_type };
SimpleTensor<T> weights{ _weights_shape, _data_type };
SimpleTensor<T> biases{ _biases_shape, _data_type };
@@ -542,27 +729,28 @@ protected:
Size2D _dilation{};
unsigned int _depth_multiplier{};
unsigned int _n0{};
+ bool _export_to_cl_image{};
+ bool _validate_output{ true };
+ bool _in_place{ false };
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
- template <typename...>
void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type,
QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, ActivationLayerInfo act_info)
{
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type,
data_type, input_quantization_info, input_quantization_info, output_quantization_info,
- data_layout, act_info, mixed_layout);
+ data_layout, act_info, mixed_layout, in_place);
}
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, bool in_place = false>
class DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
{
public:
- template <typename...>
void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, ActivationLayerInfo act_info)
{
@@ -580,10 +768,10 @@ public:
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
input_data_type, weights_data_type,
input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info,
- data_layout, act_info);
+ data_layout, act_info, false, in_place);
}
};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index af998bb740..4eb25a5bc5 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DequantizationValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType src_data_type, DataType dst_datatype, DataLayout data_layout)
{
_quantization_info = generate_quantization_info(src_data_type, shape.z());
diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h
new file mode 100644
index 0000000000..e80ad2f54f
--- /dev/null
+++ b/tests/validation/fixtures/DirectConvolution3DFixture.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/Conv3D.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolution3DValidationGenericFixture : public framework::Fixture
+{
+public:
+ using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
+
+ void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth,
+ unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout,
+ const QuantizationInfo &src_qinfo = QuantizationInfo(), const QuantizationInfo &weights_qinfo = QuantizationInfo(), const QuantizationInfo &dst_qinfo = QuantizationInfo())
+ {
+ ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NDHWC);
+
+ const TensorShape weights_shape(num_kernels, input_shape[0], kernel_width, kernel_height, kernel_depth);
+ const TensorShape bias_shape(num_kernels);
+ const DataType bias_data_type = is_data_type_quantized(data_type) ? DataType::S32 : data_type;
+ const Conv3dInfo conv3d_info(Size3D(stride_x, stride_y, stride_z), Padding3D(pad_x, pad_y, pad_z), act_info, Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false);
+ const TensorShape output_shape = compute_conv3d_shape(input_shape, weights_shape, conv3d_info);
+
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, data_layout, src_qinfo, weights_qinfo, dst_qinfo);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, src_qinfo, weights_qinfo, dst_qinfo);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const Conv3dInfo &conv3d_info,
+ bool has_bias, const DataType &data_type, const DataType &bias_data_type, const DataLayout &data_layout, const QuantizationInfo &src_qinfo,
+ const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo)
+ {
+ // Create tensors
+ TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, src_qinfo, data_layout);
+ TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, weights_qinfo, data_layout);
+ TensorType bias = has_bias ? create_tensor<TensorType>(bias_shape, bias_data_type, 1, QuantizationInfo()) : TensorType();
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, dst_qinfo, data_layout);
+
+ // Create and configure function
+ FunctionType conv{};
+ conv.configure(&src, &weights, has_bias ? &bias : nullptr, &dst, conv3d_info);
+
+ ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(weights.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ weights.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!weights.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(src), 0);
+ fill(AccessorType(weights), 1);
+
+ if(has_bias)
+ {
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+ bias.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ fill(AccessorType(bias), 2);
+ }
+
+ // Compute Direct Convolution 3D function
+ conv.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
+ const Conv3dInfo &conv3d_info, bool has_bias, const DataType &data_type, const DataType &bias_data_type, const QuantizationInfo &src_qinfo,
+ const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo)
+ {
+ // Create reference
+ SimpleTensor<T> src{ input_shape, data_type, 1, src_qinfo };
+ SimpleTensor<T> weights{ weights_shape, data_type, 1, weights_qinfo };
+ SimpleTensor<TBias> bias{ bias_shape, bias_data_type };
+ SimpleTensor<T> dst{ output_shape, data_type, 1, dst_qinfo };
+
+ // Fill reference
+ fill(src, 0);
+ fill(weights, 1);
+
+ if(has_bias)
+ {
+ fill(bias, 2);
+ }
+
+ return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolution3DValidationFixture : public DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth,
+ unsigned int num_kernels, bool has_bias, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout)
+ {
+ DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, stride_z, pad_x, pad_y, pad_z, kernel_width, kernel_height,
+ kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolution3DValidationQuantizedFixture : public DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth,
+ unsigned int num_kernels, bool has_bias, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout, QuantizationInfo src_qinfo, QuantizationInfo weights_qinfo,
+ QuantizationInfo dst_qinfo)
+ {
+ DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, stride_z, pad_x, pad_y, pad_z, kernel_width, kernel_height,
+ kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout, src_qinfo,
+ weights_qinfo, dst_qinfo);
+ }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index 614aa20753..6f204642ca 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,6 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H
+
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -51,11 +55,52 @@ class DirectConvolutionValidationGenericFixture : public framework::Fixture
public:
using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
- template <typename...>
+ void setup_quantization(const TensorShape &input_shape, const TensorShape &weights_shape, QuantizationInfo &input_q_info,
+ QuantizationInfo &weights_q_info, DataType data_type)
+ {
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ input_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+ weights_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+ QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info,
+ weights_shape.y() /* heights */, weights_shape.x() /* width */, input_shape.z() /* channels */,
+ data_type, 0.5f /* bias_fraction */);
+
+ _dst_q_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+
+ // Do not change here as these limits are the natural limits of the associated data types and
+ // are embeded in the computation of the dst quantization info.
+ _min_u8 = 0;
+ _max_u8 = 255;
+ _min_s8 = -128;
+ _max_s8 = 127;
+ }
+
void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels,
DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout, bool mixed_layout = false)
{
- _quantization_info = quantization_info;
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] +
+ stride_x + stride_y + pad_x + pad_y + kernel_size + num_kernels + mixed_layout
+ + (data_layout == DataLayout::NHWC);
+
_data_type = data_type;
_mixed_layout = mixed_layout;
@@ -69,24 +114,48 @@ public:
const TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, info);
- _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info, data_layout);
- _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info);
+ QuantizationInfo input_q_info = quantization_info;
+ QuantizationInfo weights_q_info = quantization_info;
+ _dst_q_info = quantization_info;
+
+ if(is_data_type_quantized(data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY))
+ {
+ setup_quantization(input_shape, weights_shape, input_q_info, weights_q_info, data_type);
+ }
+
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info, data_layout);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info);
}
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout)
{
ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN);
ARM_COMPUTE_UNUSED(dilation);
- _quantization_info = quantization_info;
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] +
+ weights_shape[0] + weights_shape[1] + weights_shape[2] + weights_shape[3] + dilation.x() +
+ dilation.y() + info.pad_bottom() + info.pad_left() + info.pad_right() + info.pad_top();
+
_data_type = data_type;
const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
- _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info, data_layout);
- _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info);
+ QuantizationInfo input_q_info = quantization_info;
+ QuantizationInfo weights_q_info = quantization_info;
+ _dst_q_info = quantization_info;
+
+ if(is_data_type_quantized(data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY))
+ {
+ setup_quantization(input_shape, weights_shape, input_q_info, weights_q_info, data_type);
+ }
+
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info, data_layout);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info);
}
protected:
@@ -112,14 +181,14 @@ protected:
{
case DataType::QASYMM8:
{
- std::uniform_int_distribution<uint8_t> distribution(0, 50);
+ std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8);
library->fill(tensor, distribution, i);
break;
}
case DataType::QASYMM8_SIGNED:
{
// Use small input range to avoid all the test results being saturated at the end.
- std::uniform_int_distribution<int8_t> distribution(-25, 25);
+ std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8);
library->fill(tensor, distribution, i);
break;
}
@@ -137,7 +206,7 @@ protected:
}
case DataType::S32:
{
- std::uniform_int_distribution<int32_t> distribution(-5, 5);
+ std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
library->fill(tensor, distribution, i);
break;
}
@@ -147,7 +216,7 @@ protected:
}
TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info,
- DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, const DataLayout &data_layout)
+ DataType data_type, DataType bias_data_type, QuantizationInfo input_q_info, QuantizationInfo weights_q_info, ActivationLayerInfo act_info, const DataLayout &data_layout)
{
if(data_layout == DataLayout::NHWC)
{
@@ -157,10 +226,10 @@ protected:
}
// Create tensors
- TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, quantization_info, data_layout);
- TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, quantization_info, data_layout);
- TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, quantization_info);
- TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, quantization_info, data_layout);
+ TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, input_q_info, data_layout);
+ TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, weights_q_info, data_layout);
+ TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, QuantizationInfo());
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, _dst_q_info, data_layout);
add_padding_x({ &src, &bias, &dst }, data_layout);
add_padding_x({ &weights }, data_layout, input_shape[0] % 4 == 0); // Don't add left padding if cl image will be used
@@ -186,9 +255,9 @@ protected:
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
- fill(AccessorType(src), 0);
- fill(AccessorType(weights), 1);
- fill(AccessorType(bias), 2);
+ fill(AccessorType(src), 0 + _hash);
+ fill(AccessorType(weights), 1 + _hash);
+ fill(AccessorType(bias), 2 + _hash);
if(_mixed_layout)
{
@@ -204,33 +273,45 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
- DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info)
+ DataType data_type, DataType bias_data_type, QuantizationInfo input_q_info, QuantizationInfo weights_q_info, ActivationLayerInfo act_info)
{
// Create reference
- SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info };
- SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info };
- SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, quantization_info };
+ SimpleTensor<T> src{ input_shape, data_type, 1, input_q_info };
+ SimpleTensor<T> weights{ weights_shape, data_type, 1, weights_q_info };
+ SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, QuantizationInfo() };
// Fill reference
- fill(src, 0);
- fill(weights, 1);
- fill(bias, 2);
-
- SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info);
- return (act_info.enabled()) ? reference::activation_layer<T>(dst, act_info) : dst;
+ fill(src, 0 + _hash);
+ fill(weights, 1 + _hash);
+ fill(bias, 2 + _hash);
+
+ SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info,
+ Size2D(1U, 1U) /* dilation */, 1 /* num_groups */, _dst_q_info);
+ SimpleTensor<T> dst2 = (act_info.enabled()) ? reference::activation_layer<T>(dst, act_info) : dst;
+ return dst2;
}
TensorType _target{};
SimpleTensor<T> _reference{};
- QuantizationInfo _quantization_info{};
+ QuantizationInfo _dst_q_info{};
DataType _data_type{};
bool _mixed_layout{ false };
+ int32_t _hash{0};
+
+ // Random initialization limits
+ // Default values are previously handcrafted limits
+ // that sould be used when we don't use dynamic quantization
+ int32_t _min_bias{-5};
+ int32_t _max_bias{5};
+ int32_t _min_u8{0};
+ int32_t _max_u8{50};
+ int32_t _min_s8{-25};
+ int32_t _max_s8{25};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
class DirectConvolutionValidationFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, ActivationLayerInfo act_info,
DataLayout data_layout)
{
@@ -243,7 +324,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DirectConvolutionValidationQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info,
ActivationLayerInfo act_info, DataLayout data_layout)
{
@@ -256,7 +336,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout)
{
@@ -269,7 +348,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, ActivationLayerInfo act_info)
{
@@ -281,3 +359,5 @@ public:
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/DropoutLayerFixture.h b/tests/validation/fixtures/DropoutLayerFixture.h
index 2a91911410..a84f2a6407 100644
--- a/tests/validation/fixtures/DropoutLayerFixture.h
+++ b/tests/validation/fixtures/DropoutLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DropoutLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, float ratio, bool forward, DataType data_type)
{
_target = compute_target(shape, ratio, forward, data_type);
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index 352720c03b..f36a1f75b7 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,11 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_FIXTURE
-#define ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
#include "tests/AssetsLibrary.h"
#include "tests/Globals.h"
#include "tests/IAccessor.h"
@@ -45,13 +46,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticOperationsGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool use_dyanmic_shape = false)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace = false, bool use_dynamic_shape = false)
{
_op = op;
- _use_dynamic_shape = use_dyanmic_shape;
+ _use_dynamic_shape = use_dynamic_shape;
+ _is_inplace = is_inplace;
_target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
_reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
@@ -85,9 +86,29 @@ protected:
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
{
// Create tensors
- TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
- TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
- TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out);
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
+ TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
+ TensorType dst = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
+
+ // Check whether do in-place computation and whether inputs are broadcast compatible
+ TensorType *actual_dst = &dst;
+ if(_is_inplace)
+ {
+ bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type);
+ bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type);
+ bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+ ARM_COMPUTE_ASSERT(do_in_place);
+
+ if(src1_is_inplace)
+ {
+ actual_dst = &ref_src1;
+ }
+ else
+ {
+ actual_dst = &ref_src2;
+ }
+ }
// if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes.
// - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic()
@@ -101,7 +122,7 @@ protected:
// Create and configure function
FunctionType elem_op;
- elem_op.configure(&ref_src1, &ref_src2, &dst);
+ elem_op.configure(&ref_src1, &ref_src2, actual_dst);
if(_use_dynamic_shape)
{
@@ -111,16 +132,21 @@ protected:
ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
// Allocate tensors
ref_src1.allocator()->allocate();
ref_src2.allocator()->allocate();
- dst.allocator()->allocate();
+
+ // If don't do in-place computation, still need to allocate original dst
+ if(!_is_inplace)
+ {
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+ dst.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+ }
ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(ref_src1), 0);
@@ -129,7 +155,7 @@ protected:
// Compute function
elem_op.run();
- return dst;
+ return std::move(*actual_dst);
}
SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1,
@@ -152,6 +178,7 @@ protected:
SimpleTensor<T> _reference{};
ArithmeticOperation _op{ ArithmeticOperation::ADD };
bool _use_dynamic_shape{ false };
+ bool _is_inplace{ false };
};
// Arithmetic operation fused with activation function
@@ -159,15 +186,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticOperationsFuseActivationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace = true)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
- _act_info = act_info;
+ qinfo0, qinfo1, qinfo_out, is_inplace);
+ _act_info = act_info;
+ _is_inplace = is_inplace;
}
protected:
@@ -175,26 +202,51 @@ protected:
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
{
// Create tensors
- TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
- TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
- TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out);
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
+ TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
+ TensorType dst = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
+
+ // Check whether do in-place computation and whether inputs are broadcast compatible
+ TensorType *actual_dst = &dst;
+ if(_is_inplace)
+ {
+ bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type);
+ bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type);
+ bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+ ARM_COMPUTE_ASSERT(do_in_place);
+
+ if(src1_is_inplace)
+ {
+ actual_dst = &ref_src1;
+ }
+ else
+ {
+ actual_dst = &ref_src2;
+ }
+ }
// Create and configure function
FunctionType elem_op;
- elem_op.configure(&ref_src1, &ref_src2, &dst, _act_info);
+ elem_op.configure(&ref_src1, &ref_src2, actual_dst, _act_info);
ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
// Allocate tensors
ref_src1.allocator()->allocate();
ref_src2.allocator()->allocate();
- dst.allocator()->allocate();
+
+ // If don't do in-place computation, still need to allocate original dst
+ if(!_is_inplace)
+ {
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+ dst.allocator()->allocate();
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+ }
ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(ref_src1), 0);
@@ -203,7 +255,7 @@ protected:
// Compute function
elem_op.run();
- return dst;
+ return std::move(*actual_dst);
}
SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1,
@@ -216,18 +268,18 @@ protected:
}
ActivationLayerInfo _act_info{};
+ bool _is_inplace{ false };
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class ArithmeticDivisionBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -235,12 +287,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -248,12 +299,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionBroadcastDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace, true /* use_dynamic_shape */);
}
};
@@ -261,12 +311,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace, true /* use_dynamic_shape */);
}
};
@@ -274,12 +323,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -287,12 +335,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -300,12 +347,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionValidationIntegerFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -313,14 +359,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ArithmeticDivisionValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -328,12 +373,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -341,12 +385,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -354,12 +397,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -367,12 +409,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -380,14 +421,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -395,14 +435,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMaxQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -410,12 +449,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -423,12 +461,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -436,12 +473,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -449,12 +485,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -462,14 +497,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -477,14 +511,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseMinQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -492,12 +525,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -505,12 +537,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -518,12 +549,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -531,12 +561,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -544,14 +573,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -559,14 +587,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwiseSquaredDiffQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
- QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+ QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
data_type0, data_type1, output_data_type,
- qinfo0, qinfo1, qinfo_out);
+ qinfo0, qinfo1, qinfo_out, is_inplace);
}
};
@@ -574,7 +601,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PReluLayerBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1,
@@ -587,7 +613,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PReluLayerValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape, shape,
@@ -600,7 +625,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PReluLayerValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
@@ -615,7 +639,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PReluLayerQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
@@ -630,12 +653,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwisePowerBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -643,12 +665,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwisePowerValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
{
ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
}
};
@@ -656,12 +677,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwisePowerBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
@@ -669,16 +689,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementwisePowerValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+ void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
{
ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape,
data_type0, data_type1, output_data_type,
- QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+ QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
}
};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H
diff --git a/tests/validation/fixtures/ElementwiseUnaryFixture.h b/tests/validation/fixtures/ElementwiseUnaryFixture.h
index 7221226fd1..15344288db 100644
--- a/tests/validation/fixtures/ElementwiseUnaryFixture.h
+++ b/tests/validation/fixtures/ElementwiseUnaryFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,10 @@
#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE
#define ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE
+#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
#include "tests/AssetsLibrary.h"
#include "tests/Globals.h"
#include "tests/IAccessor.h"
@@ -33,6 +35,11 @@
#include "tests/framework/Fixture.h"
#include "tests/validation/reference/ElementwiseUnary.h"
+#include <tuple>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
namespace arm_compute
{
namespace test
@@ -43,12 +50,12 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ElementWiseUnaryValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op, bool use_dynamic_shape = false)
+ void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op,
+ bool use_dynamic_shape = false, QuantizationInfo qinfo = QuantizationInfo(), QuantizationInfo qinfo_out = QuantizationInfo())
{
_op = op;
- _target = compute_target(input_shape, input_data_type, in_place);
- _reference = compute_reference(input_shape, input_data_type);
+ _target = compute_target(input_shape, input_data_type, in_place, qinfo, qinfo_out);
+ _reference = compute_reference(input_shape, input_data_type, qinfo, qinfo_out);
_use_dynamic_shape = use_dynamic_shape;
}
@@ -63,60 +70,131 @@ protected:
{
case ElementWiseUnary::EXP:
{
- FloatDistributionType distribution{ FloatType(-1.0f), FloatType(1.0f) };
- library->fill(tensor, distribution, i);
+ switch(data_type)
+ {
+ case DataType::F32:
+ {
+ FloatDistributionType distribution{ FloatType(-86.63f), FloatType(88.36f) };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
+ case DataType::F16:
+ {
+ FloatDistributionType distribution{ FloatType(-9.00f), FloatType(10.73f) };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ library->fill_tensor_uniform(tensor, i);
+ break;
+
+ default:
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+
break;
}
case ElementWiseUnary::RSQRT:
+ case ElementWiseUnary::LOG:
{
- FloatDistributionType distribution{ FloatType(1.0f), FloatType(2.0f) };
- library->fill(tensor, distribution, i);
- break;
- }
- case ElementWiseUnary::ABS:
- case ElementWiseUnary::NEG:
- {
+ // For floating-point data type, the chosen input range is all positive numbers
+ // (i.e. positive and negative zeros are excluded).
switch(data_type)
{
+ case DataType::F32:
+ {
+ FloatDistributionType distribution{ std::numeric_limits<float>::min(), std::numeric_limits<float>::max() };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
case DataType::F16:
{
- arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -2.0f, 2.0f };
+ FloatDistributionType distribution{ FloatType(0.00006103515625f), FloatType(65504.0f) };
library->fill(tensor, distribution, i);
break;
}
+
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ library->fill_tensor_uniform(tensor, i);
+ break;
+
+ default:
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+
+ break;
+ }
+ case ElementWiseUnary::SIN:
+ {
+ switch(data_type)
+ {
case DataType::F32:
+ case DataType::F16:
{
- FloatDistributionType distribution{ FloatType(-2.0f), FloatType(2.0f) };
+ FloatDistributionType distribution{ FloatType(-100.0f), FloatType(100.0f) };
library->fill(tensor, distribution, i);
break;
}
+
case DataType::S32:
{
- std::uniform_int_distribution<int32_t> distribution(-100, 100);
+ std::uniform_int_distribution<int32_t> distribution(std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max());
library->fill(tensor, distribution, i);
break;
}
+
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ library->fill_tensor_uniform(tensor, i);
+ break;
+
default:
- ARM_COMPUTE_ERROR("DataType for Elementwise Negation Not implemented");
+ ARM_COMPUTE_ERROR("Not implemented");
}
+
break;
}
- case ElementWiseUnary::LOG:
- {
- FloatDistributionType distribution{ FloatType(0.0000001f), FloatType(100.0f) };
- library->fill(tensor, distribution, i);
- break;
- }
- case ElementWiseUnary::SIN:
- {
- FloatDistributionType distribution{ FloatType(-100.00f), FloatType(100.00f) };
- library->fill(tensor, distribution, i);
- break;
- }
+ case ElementWiseUnary::ABS:
+ case ElementWiseUnary::NEG:
case ElementWiseUnary::ROUND:
{
- FloatDistributionType distribution{ FloatType(100.0f), FloatType(-100.0f) };
- library->fill(tensor, distribution, i);
+ switch(data_type)
+ {
+ case DataType::F32:
+ {
+ FloatDistributionType distribution{ std::numeric_limits<float>::lowest() / 2, std::numeric_limits<float>::max() / 2 };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
+ case DataType::F16:
+ {
+ FloatDistributionType distribution{ FloatType(-65504.0f), FloatType(65504.0f) };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
+ case DataType::S32:
+ {
+ std::uniform_int_distribution<int32_t> distribution(std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max());
+ library->fill(tensor, distribution, i);
+ break;
+ }
+
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ library->fill_tensor_uniform(tensor, i);
+ break;
+
+ default:
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+
break;
}
default:
@@ -124,12 +202,11 @@ protected:
}
}
- TensorType compute_target(const TensorShape &shape, DataType data_type, bool in_place)
+ TensorType compute_target(const TensorShape &shape, DataType data_type, bool in_place, QuantizationInfo qinfo, QuantizationInfo qinfo_out)
{
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
+ TensorType src = create_tensor<TensorType>(shape, data_type, 1, qinfo);
+ TensorType dst = create_tensor<TensorType>(shape, data_type, 1, qinfo_out);
TensorType *actual_dst = in_place ? &src : &dst;
// if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes.
@@ -176,28 +253,39 @@ protected:
}
}
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
+ SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo, QuantizationInfo qinfo_out)
{
// Create reference
- SimpleTensor<T> src{ shape, data_type };
+ SimpleTensor<T> src{ shape, data_type, 1, qinfo };
+ SimpleTensor<T> dst{ shape, data_type, 1, qinfo_out };
// Fill reference
fill(src, 0, data_type);
- return reference::elementwise_unary<T>(src, _op);
+ return reference::elementwise_unary<T>(src, dst, _op);
}
TensorType _target{};
SimpleTensor<T> _reference{};
ElementWiseUnary _op{};
bool _use_dynamic_shape{ false };
+ QuantizationInfo _input_qinfo{};
+ QuantizationInfo _output_qinfo{};
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class RsqrtQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo, QuantizationInfo qinfo_out)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT, false, qinfo, qinfo_out);
+ }
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class RsqrtValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT);
@@ -208,7 +296,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class RsqrtDynamicShapeValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT, true);
@@ -219,7 +306,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ExpValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP);
@@ -227,10 +313,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ExpQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class NegValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG);
@@ -238,10 +333,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NegQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class NegValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type, bool in_place)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG);
@@ -249,10 +353,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NegQuantizedValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, bool in_place, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class LogValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG);
@@ -260,10 +373,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class LogQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class AbsValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS);
@@ -271,10 +393,19 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class AbsQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class SinValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN);
@@ -282,15 +413,34 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SinQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN, false, iq, oq);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class RoundValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType data_type)
{
ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND);
}
};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class RoundQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq)
+ {
+ ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND, false, iq, oq);
+ }
+};
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/FFTFixture.h b/tests/validation/fixtures/FFTFixture.h
index a70335b6f1..024227b22a 100644
--- a/tests/validation/fixtures/FFTFixture.h
+++ b/tests/validation/fixtures/FFTFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FFTValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
@@ -134,7 +133,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FFTConvolutionValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false)
{
@@ -202,6 +200,8 @@ protected:
TensorType bias = create_tensor<TensorType>(bias_shape, _data_type, 1, QuantizationInfo(), _data_layout);
TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+ add_padding_x({ &src, &weights, &bias, &dst }, _data_layout);
+
// Create and configure function
FunctionType conv;
conv.configure(&src, &weights, &bias, &dst, info, act_info, _data_type == DataType::F16);
@@ -211,8 +211,6 @@ protected:
ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
- add_padding_x({ &src, &weights, &bias, &dst }, _data_layout);
-
// Allocate tensors
src.allocator()->allocate();
weights.allocator()->allocate();
@@ -271,7 +269,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FFTConvolutionValidationFixture : public FFTConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info)
{
diff --git a/tests/validation/fixtures/FillFixture.h b/tests/validation/fixtures/FillFixture.h
index 706c13565d..0239a68903 100644
--- a/tests/validation/fixtures/FillFixture.h
+++ b/tests/validation/fixtures/FillFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,7 +42,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FillFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, DataType data_type)
{
_target = compute_target(input_shape, data_type);
diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h
index b6c0dde28a..e72487c7cf 100644
--- a/tests/validation/fixtures/FlattenLayerFixture.h
+++ b/tests/validation/fixtures/FlattenLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FlattenLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
TensorShape shape_flatten;
diff --git a/tests/validation/fixtures/FloorFixture.h b/tests/validation/fixtures/FloorFixture.h
index d6e19444db..7d38666f47 100644
--- a/tests/validation/fixtures/FloorFixture.h
+++ b/tests/validation/fixtures/FloorFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FloorValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h
index 7d767642f3..344187868f 100644
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -34,6 +34,7 @@
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
#include "tests/validation/reference/ActivationLayer.h"
#include "tests/validation/reference/FullyConnectedLayer.h"
#include "tests/validation/reference/Utils.h"
@@ -54,7 +55,40 @@ public:
using TBias = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type;
public:
- template <typename...>
+ void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type)
+ {
+ _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1];
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ input_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+ weights_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+
+ const int k = weights_shape.x();
+ QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/);
+
+ _dst_q_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+
+ // Do not change here as these limits are the natural limits of the associated data types and
+ // are embedded in the computation of the dst quantization info.
+ _min_u8 = 0;
+ _max_u8 = 255;
+ _min_s8 = -128;
+ _max_s8 = 127;
+ }
+
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false)
{
@@ -64,7 +98,20 @@ public:
_mixed_layout = mixed_layout;
_data_type = data_type;
_bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
- _quantization_info = quantization_info;
+
+ // Note : Quantization Info parameter from setup function is only used when quant datatype and activation function is not enabled or is identity.
+ if(is_data_type_quantized(data_type) && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY))
+ {
+ // Initialises quantization info with appropriate scale and offset for given input shapes.
+ setup_quantization(weights_shape, output_shape,_input_q_info, _weight_q_info, data_type);
+ }
+ else
+ {
+ _input_q_info = quantization_info;
+ _weight_q_info = quantization_info;
+ _dst_q_info = quantization_info;
+ }
+
_activation_info = activation_info;
_target = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
@@ -92,17 +139,17 @@ protected:
{
if(_data_type == DataType::QASYMM8)
{
- std::uniform_int_distribution<uint8_t> distribution(0, 30);
+ std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8);
library->fill(tensor, distribution, i);
}
else if(_data_type == DataType::QASYMM8_SIGNED)
{
- std::uniform_int_distribution<int8_t> distribution(-15, 15);
+ std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8);
library->fill(tensor, distribution, i);
}
else if(_data_type == DataType::S32)
{
- std::uniform_int_distribution<int32_t> distribution(-50, 50);
+ std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
library->fill(tensor, distribution, i);
}
else if(_data_type == DataType::F16)
@@ -144,10 +191,10 @@ protected:
}
// Create tensors
- TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info);
- TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _quantization_info);
- TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info);
- TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _quantization_info);
+ TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _input_q_info);
+ TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _weight_q_info);
+ TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1);
+ TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _dst_q_info);
// Create Fully Connected layer info
FullyConnectedLayerInfo fc_info;
@@ -178,8 +225,8 @@ protected:
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
- fill(AccessorType(src), 0);
- fill(AccessorType(bias), 2);
+ fill(AccessorType(src), 0 + _hash);
+ fill(AccessorType(bias), 2 + _hash);
if(!reshape_weights || !transpose_weights)
{
@@ -187,7 +234,7 @@ protected:
RawTensor tmp(tmp_shape, _data_type, 1);
// Fill with original shape
- fill(tmp, 1);
+ fill(tmp, 1 + _hash);
// Transpose elementwise
tmp = transpose(tmp);
@@ -204,7 +251,7 @@ protected:
}
else
{
- fill(AccessorType(weights), 1);
+ fill(AccessorType(weights), 1 + _hash);
}
if(_mixed_layout)
@@ -223,16 +270,16 @@ protected:
SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape)
{
// Create reference
- SimpleTensor<T> src{ input_shape, _data_type, 1, _quantization_info };
- SimpleTensor<T> weights{ weights_shape, _data_type, 1, _quantization_info };
- SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _quantization_info };
+ SimpleTensor<T> src{ input_shape, _data_type, 1, _input_q_info };
+ SimpleTensor<T> weights{ weights_shape, _data_type, 1, _weight_q_info };
+ SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, QuantizationInfo() };
// Fill reference
- fill(src, 0);
- fill(weights, 1);
- fill(bias, 2);
+ fill(src, 0 + _hash);
+ fill(weights, 1 + _hash);
+ fill(bias, 2 + _hash);
- return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape), _activation_info, _quantization_info);
+ return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape, _dst_q_info), _activation_info, _dst_q_info);
}
TensorType _target{};
@@ -240,15 +287,28 @@ protected:
DataType _data_type{};
DataType _bias_data_type{};
bool _mixed_layout{ false };
- QuantizationInfo _quantization_info{};
+ QuantizationInfo _input_q_info{};
+ QuantizationInfo _weight_q_info{};
+ QuantizationInfo _dst_q_info{};
ActivationLayerInfo _activation_info{};
+
+ // Random initialization limits
+ // Default values are previously handcrafted limits
+ // that sould be used when we don't use dynamic quantization
+ int32_t _min_bias{-50};
+ int32_t _max_bias{50};
+
+ int32_t _min_u8{0};
+ int32_t _max_u8{30};
+ int32_t _min_s8{-15};
+ int32_t _max_s8{15};
+ int _hash{0};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
ActivationLayerInfo activation_info)
{
@@ -262,7 +322,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
QuantizationInfo quantization_info, ActivationLayerInfo activation_info)
{
@@ -273,7 +332,7 @@ public:
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class FullyConnectedWithDynamicWeightsFixture : public framework::Fixture
+class FullyConnectedWithDynamicTensorsFixture : public framework::Fixture
{
private:
template <typename U>
@@ -289,6 +348,21 @@ private:
std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
library->fill(tensor, distribution, i);
}
+ else if(_data_type == DataType::QASYMM8)
+ {
+ std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8);
+ library->fill(tensor, distribution, i);
+ }
+ else if(_data_type == DataType::QASYMM8_SIGNED)
+ {
+ std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8);
+ library->fill(tensor, distribution, i);
+ }
+ else if(_data_type == DataType::S32)
+ {
+ std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
+ library->fill(tensor, distribution, i);
+ }
else
{
library->fill_tensor_uniform(tensor, i);
@@ -316,82 +390,198 @@ private:
}
}
- void validate_with_tolerance(TensorType &target, SimpleTensor<T> &ref)
+ void validate_with_tolerance(TensorType &target, SimpleTensor<float> &ref)
{
- if(_data_type == DataType::F32)
- {
- constexpr RelativeTolerance<float> rel_tolerance_f32(0.05f);
- constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f);
- validate(AccessorType(target), ref, rel_tolerance_f32, 0, abs_tolerance_f32);
- }
- else
- {
- validate(AccessorType(target), ref);
- }
+ constexpr RelativeTolerance<float> rel_tolerance_f32(0.01f);
+ constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.001f);
+ validate(AccessorType(target), ref, rel_tolerance_f32, 0, abs_tolerance_f32);
+ }
+
+ void validate_with_tolerance(TensorType &target, SimpleTensor<half_float::half> &ref)
+ {
+ constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.3f);
+ const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f));
+ constexpr float tolerance_num_f16 = 0.07f;
+
+ validate(AccessorType(target), ref, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+ }
+
+ void validate_with_tolerance(TensorType &target, SimpleTensor<uint8_t> &ref)
+ {
+ constexpr AbsoluteTolerance<uint32_t> tolerance_qasymm8(1);
+ validate(AccessorType(target), ref, tolerance_qasymm8);
+ }
+
+ void validate_with_tolerance(TensorType &target, SimpleTensor<int8_t> &ref)
+ {
+ constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8_signed(1);
+ validate(AccessorType(target), ref, tolerance_qasymm8_signed);
+ }
+
+ void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type)
+ {
+ _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1];
+
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ input_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+ weights_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+ const int k = weights_shape.x();
+ QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/);
+
+ _dst_q_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+
+ // Do not change here as these limits are the natural limits of the associated data types and
+ // are embedded in the computation of the dst quantization info.
+ _min_u8 = 0;
+ _max_u8 = 255;
+ _min_s8 = -128;
+ _max_s8 = 127;
}
public:
- template <typename...>
+ using TDecay = typename std::decay<T>::type;
+ using TBias = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type;
+
void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape,
- DataType data_type, ActivationLayerInfo activation_info)
+ DataType data_type, ActivationLayerInfo activation_info, bool constant_weights, bool constant_bias, bool weights_reshaped, bool remove_bias = false)
{
_data_type = data_type;
- // Setup tensor meta-data
- TensorInfo src_info(src_shape, 1, data_type);
- _src.allocator()->init(src_info);
+ const bool is_quantized = is_data_type_quantized(data_type);
+ const DataType bias_data_type = (is_quantized) ? DataType::S32 : data_type;
- TensorShape tr_weights_shape{ weights_shape[1], weights_shape[0] };
- TensorInfo wei_info(tr_weights_shape, 1, data_type);
- _weights.allocator()->init(wei_info);
+ if (is_quantized && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY))
+ {
+ setup_quantization(weights_shape, dst_shape, _src_q_info, _weights_q_info, data_type);
+ }
+ else
+ {
+ _src_q_info = QuantizationInfo(0.1f, 10);
+ _dst_q_info = QuantizationInfo(0.3f, 20);
+ _weights_q_info = QuantizationInfo(0.2f, 5);
+ }
- TensorInfo bias_info(bias_shape, 1, data_type);
- _bias.allocator()->init(bias_info);
+ // Configure TensorInfo Objects
+ const TensorInfo src_info(src_shape, 1, data_type, _src_q_info);
+ const TensorInfo dst_info(dst_shape, 1, data_type, _dst_q_info);
+ TensorInfo bias_info(bias_shape, 1, bias_data_type);
+ TensorInfo wei_info(weights_shape, 1, data_type, _weights_q_info);
- TensorInfo dst_info(dst_shape, 1, data_type);
+ if(!constant_weights && weights_reshaped)
+ {
+ const TensorShape tr_weights_shape{ weights_shape[1], weights_shape[0] };
+ wei_info.set_tensor_shape(tr_weights_shape);
+ }
+ wei_info.set_are_values_constant(constant_weights);
+ bias_info.set_are_values_constant(constant_bias);
+
+ // Initialise Tensors
+ _src.allocator()->init(src_info);
+ _weights.allocator()->init(wei_info);
+ if(!remove_bias)
+ _bias.allocator()->init(bias_info);
_dst.allocator()->init(dst_info);
// Configure FC layer and mark the weights as non constant
FullyConnectedLayerInfo fc_info;
- fc_info.activation_info = activation_info;
- fc_info.are_weights_reshaped = true;
- fc_info.transpose_weights = false;
- fc_info.constant_weights = false;
+ fc_info.activation_info = activation_info;
+ if(!constant_weights)
+ {
+ fc_info.are_weights_reshaped = weights_reshaped;
+ fc_info.transpose_weights = !weights_reshaped;
+ }
FunctionType fc;
- fc.configure(&_src, &_weights, &_bias, &_dst, fc_info);
+ fc.configure(&_src, &_weights, (remove_bias) ? nullptr : &_bias, &_dst, fc_info);
// Allocate all the tensors
_src.allocator()->allocate();
_weights.allocator()->allocate();
- _bias.allocator()->allocate();
+ if(!remove_bias)
+ _bias.allocator()->allocate();
_dst.allocator()->allocate();
// Run multiple iterations with different inputs
constexpr int num_iterations = 5;
int randomizer_offset = 0;
+
+ // Create reference tensors
+ SimpleTensor<T> src{ src_shape, data_type, 1, _src_q_info };
+ SimpleTensor<T> weights{ weights_shape, data_type, 1, _weights_q_info };
+ SimpleTensor<TBias> bias{ bias_shape, bias_data_type };
+
+ // Fill weights and/or bias if they remain constant
+ if(constant_weights)
+ {
+ fill(AccessorType(_weights), 1 + _hash);
+ fill(weights, 1 + _hash);
+ }
+ if(constant_bias && !remove_bias)
+ {
+ fill(AccessorType(_bias), 2 + _hash);
+ fill(bias, 2 + _hash);
+ }
+ // To remove bias, fill with 0
+ if(remove_bias && is_quantized)
+ {
+ library->fill_tensor_value(bias, 0);
+ }
+ else if(remove_bias)
+ {
+ library->fill_tensor_value(bias, (float)0.0);
+ }
+
for(int i = 0; i < num_iterations; ++i)
{
// Run target
{
fill(AccessorType(_src), randomizer_offset);
- fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1);
- fill(AccessorType(_bias), randomizer_offset + 2);
+ if(!constant_weights)
+ {
+ if(weights_reshaped)
+ {
+ fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1 + _hash);
+ }
+ else
+ {
+ fill(AccessorType(_weights), randomizer_offset + 1 +_hash);
+ }
+ }
+ if(!constant_bias && !remove_bias)
+ {
+ fill(AccessorType(_bias), randomizer_offset + 2 + _hash);
+ }
fc.run();
}
// Run reference and compare
{
- SimpleTensor<T> src{ src_shape, data_type };
- SimpleTensor<T> weights{ weights_shape, data_type };
- SimpleTensor<T> bias{ bias_shape, data_type };
-
// Fill reference
fill(src, randomizer_offset);
- fill(weights, randomizer_offset + 1);
- fill(bias, randomizer_offset + 2);
+ if(!constant_weights)
+ {
+ fill(weights, randomizer_offset + 1 + _hash);
+ }
+ if(!constant_bias && !remove_bias)
+ {
+ fill(bias, randomizer_offset + 2 + _hash);
+ }
- auto dst = reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, dst_shape), activation_info);
+ auto dst = reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, dst_shape, _dst_q_info), activation_info, _dst_q_info);
// Validate
validate_with_tolerance(_dst, dst);
@@ -404,8 +594,60 @@ public:
private:
TensorType _src{}, _weights{}, _bias{}, _dst{};
DataType _data_type{ DataType::UNKNOWN };
+
+ QuantizationInfo _src_q_info{};
+ QuantizationInfo _weights_q_info{};
+ QuantizationInfo _dst_q_info{};
+
+ // Random initialization limits
+ // Default values are previously handcrafted limits
+ // that sould be used when we don't use dynamic quantization
+ int32_t _min_bias{-50};
+ int32_t _max_bias{50};
+
+ int32_t _min_u8{0};
+ int32_t _max_u8{30};
+ int32_t _min_s8{-15};
+ int32_t _max_s8{15};
+ int _hash{0};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FullyConnectedWithDynamicWeightsFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape,
+ DataType data_type, ActivationLayerInfo activation_info, bool weights_reshaped)
+ {
+ FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape,
+ dst_shape, data_type, activation_info, false, true, weights_reshaped, false);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FullyConnectedDynamicNoBiasFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape,
+ DataType data_type, ActivationLayerInfo activation_info, bool weights_reshaped)
+ {
+ FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape,
+ dst_shape, data_type, activation_info, false, true, weights_reshaped, true);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FullyConnectedWithDynamicBiasFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape,
+ DataType data_type, ActivationLayerInfo activation_info)
+ {
+ FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape,
+ dst_shape, data_type, activation_info, true, false, false, false);
+ }
};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/FuseBatchNormalizationFixture.h b/tests/validation/fixtures/FuseBatchNormalizationFixture.h
index 6fbabeee56..a05e4169a7 100644
--- a/tests/validation/fixtures/FuseBatchNormalizationFixture.h
+++ b/tests/validation/fixtures/FuseBatchNormalizationFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, int
class FuseBatchNormalizationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape_w, DataType data_type, DataLayout data_layout, bool in_place, bool with_bias, bool with_gamma, bool with_beta)
{
std::tie(_target_w, _target_b) = compute_target(shape_w, data_type, data_layout, in_place, with_bias, with_gamma, with_beta);
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index c118da66ae..94bedc83e1 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
-#define ARM_COMPUTE_TEST_GEMM_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/TensorShape.h"
@@ -34,6 +34,7 @@
#include "tests/framework/Fixture.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
#include "tests/validation/reference/GEMM.h"
#include <random>
@@ -44,16 +45,15 @@ namespace test
{
namespace validation
{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
-class GEMMValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMGenericValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false)
{
ARM_COMPUTE_UNUSED(pretranspose);
- _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
- _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
+ _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
+ _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate);
}
protected:
@@ -80,7 +80,7 @@ protected:
}
TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
// Create tensors
TensorType a = create_tensor<TensorType>(shape_a, data_type, 1);
@@ -98,8 +98,8 @@ protected:
(disable_c) ? nullptr : &c,
&dst,
alpha, beta,
- GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d
- || reinterpret_output_as_3d)));
+ GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
+ || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate));
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
ARM_COMPUTE_ASSERT(c.info()->is_resizable());
@@ -121,10 +121,25 @@ protected:
// Fill tensors
fill(AccessorType(a), 0);
fill(AccessorType(b), 1);
+ if (accumulate)
+ {
+ fill(AccessorType(dst), 6);
+ }
if(!disable_c)
{
fill(AccessorType(c), 2);
}
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ gemm.run();
+ fill(AccessorType(a), 3); // Fill tensors with new seed after run
+ fill(AccessorType(b), 4);
+ if(!disable_c)
+ {
+ fill(AccessorType(c), 5);
+ }
+ }
// Compute GEMM function
gemm.run();
@@ -133,10 +148,9 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
TensorShape shape_a_to_use = shape_a;
-
if(reinterpret_input_as_3d)
{
// Collapse the second and third dimension if the input is 3D
@@ -147,6 +161,7 @@ protected:
SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
SimpleTensor<T> b{ shape_b, data_type, 1 };
SimpleTensor<T> c{ output_shape, data_type, 1 };
+ SimpleTensor<T> dst{ output_shape, data_type, 1 };
// Fill reference
fill(a, 0);
@@ -159,27 +174,96 @@ protected:
const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(c.data() + i * n, c.data(), n * sizeof(T));
}
}
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
+ TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
+ SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
+
+ // pretranspose a if necessary
+ if(pretranspose_a)
+ {
+ transpose_matrix<T>(a, a_transposed);
+ }
+
+ // pretranspose b if necessary
+ if(pretranspose_b)
+ {
+ transpose_matrix<T>(b, b_transposed);
+ }
+
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ fill((pretranspose_a) ? a_transposed : a, 3);
+ fill((pretranspose_b) ? b_transposed : b, 4);
+ fill(c, 5);
+ }
+
+ // Do in place summation
+ if (accumulate)
+ {
+ fill(dst, 6);
+ }
+
// Setting beta to 0 will effectively disable C for the
// computation of the reference: alpha * A * B + 0 * C
- return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta);
+ // Use transposed tensors if boolean enabled else use original tensors
+ if (accumulate)
+ {
+ reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst);
+ return dst;
+ }
+ else
+ {
+ return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ }
}
TensorType _target{};
SimpleTensor<T> _reference{};
};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ bool accumulate = true;
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate);
+ }
+};
+
template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
DataType data_type, GPUTarget gpu_arch)
{
@@ -255,8 +339,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -285,7 +368,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -303,7 +386,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -378,8 +460,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -408,7 +489,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -425,7 +506,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -533,8 +613,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -563,7 +642,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -581,7 +660,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -688,8 +766,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -718,7 +795,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -735,7 +812,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
@@ -863,8 +939,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -893,7 +968,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -919,7 +994,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
@@ -1044,8 +1118,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1074,7 +1147,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1099,7 +1172,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
{
@@ -1216,8 +1288,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1246,7 +1317,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1265,7 +1336,6 @@ template <typename TensorType, typename AccessorType, typename T, typename Resha
class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
{
@@ -1387,8 +1457,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs_reshaped },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1417,7 +1486,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1435,7 +1504,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
const ActivationLayerInfo &act_info)
{
@@ -1524,8 +1592,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1554,7 +1621,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1572,7 +1639,6 @@ template <typename TensorType, typename AccessorType, typename T, typename GEMMO
class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
const ActivationLayerInfo &act_info)
{
@@ -1660,8 +1726,7 @@ protected:
ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
{ ACL_SRC_1, &rhs },
{ ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ { ACL_DST, &dst } });
gemm.run(gemm_pack);
return dst;
@@ -1690,7 +1755,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1703,7 +1768,170 @@ protected:
SimpleTensor<T> _reference{};
};
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
+class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
+ float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ GEMMLHSMatrixInfo lhs_info;
+ lhs_info.m0 = m0;
+ lhs_info.k0 = k0;
+
+ GEMMRHSMatrixInfo rhs_info;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.interleave = true;
+ rhs_info.transpose = false;
+ rhs_info.h0 = 4;
+ rhs_info.export_to_cl_image = export_to_cl_image;
+
+ // Set the tensor shapes for LHS and RHS matrices
+ const TensorShape lhs_shape(k, m, batch_size);
+ const TensorShape rhs_shape(n, k, batch_size);
+ const TensorShape bias_shape(n,
+ broadcast_bias ? 1 : m,
+ broadcast_bias ? 1 : batch_size);
+
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
+ library->fill(tensor, distribution, i);
+
+ // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
+ library->fill_borders_with_garbage(tensor, distribution_inf, i);
+ }
+
+ TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
+ {
+ // Create tensors
+ TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+ TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+ TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
+ TensorType rhs_reshaped;
+ TensorType dst;
+
+ const unsigned int M = lhs_shape[1];
+ const unsigned int N = rhs_shape[0];
+ const unsigned int K = lhs_shape[0];
+ GEMMKernelInfo kernel_info;
+ kernel_info.m = M;
+ kernel_info.n = N;
+ kernel_info.k = K;
+ kernel_info.depth_output_gemm3d = 0;
+ kernel_info.reinterpret_input_as_3d = false;
+ kernel_info.broadcast_bias = broadcast_bias;
+ kernel_info.activation_info = act_info;
+
+ // Create and configure function
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+ validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // Allocate tensors
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ rhs_reshaped.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(lhs), 0);
+ fill(AccessorType(rhs), 1);
+ fill(AccessorType(bias), 2);
+
+ // Compute GEMM
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ if(!validate_result)
+ return SimpleTensor<T>();
+
+ TensorShape dst_shape = lhs_shape;
+ dst_shape[0] = rhs_shape[0];
+ dst_shape[1] = lhs_shape[1];
+
+ // Create reference
+ SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+ SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+ SimpleTensor<T> bias{ dst_shape, data_type, 1 };
+
+ const int n = rhs_shape[0];
+ const int m = lhs_shape[1];
+ const int batch_size = lhs_shape[2];
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+ fill(bias, 2);
+
+ if(broadcast_bias)
+ {
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
+ for(int i = 1; i < m * batch_size; i++)
+ {
+ memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
+ }
+ }
+
+ return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
+ }
+
+ bool validate_result = true;
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
diff --git a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
index 44dc0dddbc..59fc460869 100644
--- a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
+++ b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class GEMMInterleave4x4ValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(size_t x, size_t y, DataType data_type)
{
_data_type = data_type;
@@ -88,7 +87,7 @@ protected:
// Create and configure function
FunctionType f;
- f.configure(&a, &b);
+ f.configure(a.info(), b.info());
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
@@ -104,8 +103,9 @@ protected:
fill(AccessorType(a), 0);
fill(AccessorType(b), 0);
- // Compute GEMM function
- f.run();
+ // Compute GEMM interleave kernel
+ ITensorPack tensors{ { ACL_SRC, &a }, { ACL_DST, &b } };
+ f.run(tensors);
return b;
}
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 5cf210bab4..aa4eedb75d 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,22 +21,20 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
-#define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
#include "tests/validation/Helpers.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Validation.h"
#include "tests/validation/reference/GEMMLowp.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
+#include "tests/validation/reference/DequantizationLayer.h"
-#include <random>
+#include <cstdint>
+#include <vector>
namespace arm_compute
{
@@ -49,84 +47,88 @@ namespace
template <typename U>
void fill(U &&tensor, int i)
{
- switch(tensor.data_type())
- {
- case DataType::QSYMM8_PER_CHANNEL:
- {
- int min_bound = 128;
- int max_bound = -127;
- for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
- {
- std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
- if(bounds.first < min_bound)
- {
- min_bound = bounds.first;
- }
- if(bounds.second > max_bound)
- {
- max_bound = bounds.second;
- }
- }
- std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound);
- library->fill(tensor, distribution, i);
- break;
- }
- case DataType::QASYMM8:
- {
- std::uniform_int_distribution<uint8_t> distribution(1, 254);
- library->fill(tensor, distribution, i);
- break;
- }
- case DataType::F16:
- {
- arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
- library->fill(tensor, distribution, i);
- break;
- }
- case DataType::F32:
- {
- std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
- library->fill(tensor, distribution, i);
- break;
- }
- default:
- library->fill_tensor_uniform(tensor, i);
- }
+ library->fill_tensor_uniform(tensor, i);
}
-template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false>
-TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
- QuantizationInfo b_qinfo = QuantizationInfo())
+template <typename U>
+void fill_quantized(U &&tensor, int i)
{
- // Create tensors
- DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
-
- TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1);
- TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
- TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1);
-
- a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
+ ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type()));
+ library->fill_tensor_uniform(tensor, i);
+}
- if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
+template <typename U>
+void fill(U &&tensor, int i, int32_t min, int32_t max)
+{
+ if (tensor.data_type() == DataType::S32) {
+ std::uniform_int_distribution<int32_t> distribution(min, max);
+ library->fill(tensor, distribution, i);
+ }
+ else if(tensor.data_type() == DataType::F32)
{
- b.info()->set_quantization_info(b_qinfo);
+ std::uniform_real_distribution<float> distribution((float)min, (float)max);
+ library->fill(tensor, distribution, i);
}
else
{
- b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+ ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+ }
+}
+
+/** Information about how to fill tensors */
+struct TensorFillInfo
+{
+ // Bias fill range. Default values are arbitrary
+ int32_t min_bias {-20000};
+ int32_t max_bias {20000};
+
+ // Output fill range. Default values are arbitrary
+ int32_t min_output {-20000};
+ int32_t max_output {20000};
+
+ // Optional extra hash to randomize tensor filling
+ int32_t hash {0};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
+TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+ const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
+ GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
+ bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
+{
+ ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+ ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
+ // If unknown, set to sensible defaults
+ if (data_type_output == DataType::UNKNOWN) {
+ data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
}
+
+ // Create tensors
+ TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo);
+ TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
+ TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
+
TensorType bias;
if(is_fused)
{
TensorShape bias_shape(shape_b[0]);
- bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
+ bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1);
}
// Create and configure function
// The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
FunctionType gemmlowp;
- gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false, output_stage));
+ gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
+ output_stage, false /*fp_mixed_precision*/, false /*fast_math*/, false /*broadcast_bias*/,
+ arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED,
+ false /* pretranspose_B */, accumulate));
+
+ // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
+ if (dynamic_qinfo)
+ {
+ a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true));
+ b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true));
+ }
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
@@ -144,25 +146,46 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
// Fill tensors
- fill(AccessorType(a), 0);
- fill(AccessorType(b), 1);
+ fill_quantized(AccessorType(a), 0 + finfo.hash);
+ fill_quantized(AccessorType(b), 1 + finfo.hash);
+
+ if (accumulate)
+ {
+ ARM_COMPUTE_ASSERT(accumulate != run_twice);
+ fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ }
if(is_fused)
{
ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
bias.allocator()->allocate();
ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
- fill(AccessorType(bias), 2);
+ fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
}
+
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ gemmlowp.run();
+ fill_quantized(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run
+ fill_quantized(AccessorType(b), 4 + finfo.hash);
+ if(is_fused)
+ {
+ fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
+ }
+ }
+
// Compute GEMM function
gemmlowp.run();
return output;
}
-template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t>
-SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
- DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
+template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false>
+SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+ DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo())
{
+ ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+ ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
TensorShape shape_a_to_use = shape_a;
if(reinterpret_input_as_3d)
{
@@ -171,101 +194,269 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
}
// Create reference
- SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 };
- SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
+ SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1, a_qinfo };
+ SimpleTensor<TW> b{ shape_b, data_type_b, 1, b_qinfo };
+
+ TensorShape shape_a_to_use_transposed{ shape_a_to_use };
+ TensorShape shape_b_transposed{ shape_b };
+
+ shape_a_to_use_transposed.set(0, shape_a_to_use[1]);
+ shape_a_to_use_transposed.set(1, shape_a_to_use[0]);
+ shape_b_transposed.set(0, shape_b[1]);
+ shape_b_transposed.set(1, shape_b[0]);
+
+ SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1, a_qinfo };
+ SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo };
// Fill reference
- fill(a, 0);
- fill(b, 1);
- return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>(a, b, shape_output, a_offset, b_offset);
-}
+ fill_quantized(a, 0 + finfo.hash);
+ fill_quantized(b, 1 + finfo.hash);
+
+ // Transpose reference if required
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+ if(pretranspose_A)
+ {
+ transpose_matrix<TI>(a, a_transposed);
+ }
+
+ if(pretranspose_B)
+ {
+ transpose_matrix<TW>(b, b_transposed);
+ }
+
+ // Run with variable inputs.
+ const int32_t a_offset = a_qinfo.uniform().offset;
+ const int32_t b_offset = b_qinfo.uniform().offset;
+
+ if(run_twice)
+ {
+ reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
+ fill_quantized((pretranspose_A) ? a_transposed : a, 3 + finfo.hash);
+ fill_quantized((pretranspose_B) ? b_transposed : b, 4 + finfo.hash);
+ }
+
+ return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
}
+} // namespace
-template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
-class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpGenericMatrixMultiplyCoreValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate=false, bool dynamic_qinfo = false)
{
- _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
- _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
+ const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
+ const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset);
+ TensorFillInfo finfo;
+ _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
}
protected:
- TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
{
- return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t>(shape_a, shape_b, shape_output, a_offset, b_offset);
+ const auto output_qinfo = QuantizationInfo(); // No output stage
+ return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo);
}
- SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+ SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate)
{
- return compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset);
+ SimpleTensor<int32_t> ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo,
+ DataType::QASYMM8, DataType::QASYMM8, finfo);
+
+ if (accumulate)
+ {
+ SimpleTensor<int32_t> output{ shape_output, DataType::S32, 1 };
+ fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ reference::arithmetic_operation<int32_t>(reference::ArithmeticOperation::ADD, output, ref_output, output, ConvertPolicy::SATURATE);
+ return output;
+ }
+
+ return ref_output;
}
TensorType _target{};
SimpleTensor<int32_t> _reference{};
};
-template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t>
-class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpMatrixMultiplyCoreValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
{
public:
- template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
{
- ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE);
- DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8;
+ GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */);
+ }
+};
- if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
- {
- output_stage.is_quantized_per_channel = true;
- const size_t num_channels = shape_b[0];
- std::vector<float> scales(num_channels);
- std::uniform_real_distribution<float> distribution(0.f, 1.f);
- library->fill(scales, distribution, 0);
- output_stage.gemmlowp_multipliers.resize(num_channels);
- output_stage.gemmlowp_shifts.resize(num_channels);
- for(size_t i = 0; i < num_channels; ++i)
- {
- quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
- }
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpMatrixMultiplyAccumulateValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
+ {
+ GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, true /* accumulate */);
+ }
+};
- _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
- _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
- }
- else
- {
- _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
- _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
- }
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
+ {
+ GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */, true /* dynamic_qinfo */);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
+class GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture
+{
+public:
+ /** Dynamically initialize the quantization info with saturation awareness
+ */
+ template <typename T>
+ static void setup_quantization(DataType data_type, const TensorShape& shape_a, const TensorShape& shape_b, QuantizationInfo& a_qinfo, QuantizationInfo& b_qinfo, QuantizationInfo& output_qinfo, TensorFillInfo& finfo)
+ {
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ finfo.hash = shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1];
+
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + finfo.hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ a_qinfo = QuantizationInfo(scale_lhs, offset_lhs);
+ b_qinfo = QuantizationInfo(scale_rhs, offset_rhs);
+
+ // reinterpret_input_as_3d or reinterpret_output_as_3d can be ignored, as the underlying gemm / matmul computation
+ // is equivalent to a standard 2D one with m-n-k dimensions
+ const int m = shape_a.y();
+ const int n = shape_b.x();
+ const int k = shape_a.x();
+
+ const float bias_fraction = 0.5f; // We enabled is_fused in compute_gemmlowp_target below, thus bias is included
+
+ QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(a_qinfo, b_qinfo, m, n, k, data_type, bias_fraction);
+ output_qinfo = q_hint.q_info;
+ finfo.min_bias = q_hint.bias_min;
+ finfo.max_bias = q_hint.bias_max;
+
+ // Both target and reference implementations use negated offsets, i.e.
+ // float_val = (int_val + offset) * scale
+ // instead of
+ // float_val = (int_val - offset) * scale
+ // as usual. Therefore, after calculating the output quantization above, we
+ // negate the offsets of inputs' offsets.
+ a_qinfo = QuantizationInfo(scale_lhs, -offset_lhs);
+ b_qinfo = QuantizationInfo(scale_rhs, -offset_rhs);
+ }
+
+ /** Initialize output stage info from quantization info */
+ static Status init_gemmlowp_output_stage_info(
+ DataType data_type,
+ const QuantizationInfo& a_qinfo,
+ const QuantizationInfo& b_qinfo,
+ const QuantizationInfo& output_qinfo,
+ GEMMLowpOutputStageType type,
+ GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_data_type_quantized_asymmetric(data_type));
+
+ const UniformQuantizationInfo aq_unif = a_qinfo.uniform();
+ const UniformQuantizationInfo bq_unif = b_qinfo.uniform();
+ const UniformQuantizationInfo oq_unif = output_qinfo.uniform();
+
+ float multiplier = (aq_unif.scale * bq_unif.scale) / oq_unif.scale;
+ int32_t int_multiplier;
+ int32_t shift;
+
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ quantization::calculate_quantized_multiplier(multiplier, &int_multiplier, &shift));
+
+ int32_t type_min = 0;
+ int32_t type_max = 0;
+ std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(output_qinfo, ActivationLayerInfo(), data_type);
+
+ gemmlowp_output_stage_info.gemmlowp_real_multiplier = multiplier;
+ gemmlowp_output_stage_info.gemmlowp_multiplier = int_multiplier;
+ gemmlowp_output_stage_info.gemmlowp_multipliers = { int_multiplier };
+ gemmlowp_output_stage_info.gemmlowp_shift = shift;
+ gemmlowp_output_stage_info.gemmlowp_shifts = { shift };
+ gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
+ gemmlowp_output_stage_info.type = type;
+ gemmlowp_output_stage_info.gemmlowp_min_bound = type_min;
+ gemmlowp_output_stage_info.gemmlowp_max_bound = type_max;
+
+ return Status{};
+ }
+
+ /** Currently this fixture only tests the following data type configurations:
+ *
+ * 1. a and b are of the same data type
+ * 2. The data type is quantized asymmetric
+ *
+ */
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type,
+ bool reshape_b_only_on_first_run)
+ {
+ ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE);
+ ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type));
+
+ // Randomized dynamic quantization: randomize quantization info in a way that ensures no result saturation
+ // most of the time
+ QuantizationInfo a_qinfo;
+ QuantizationInfo b_qinfo;
+ QuantizationInfo output_qinfo;
+ TensorFillInfo finfo;
+ setup_quantization<TI>(data_type, shape_a, shape_b, a_qinfo, b_qinfo, output_qinfo, finfo);
+
+ GEMMLowpOutputStageInfo output_stage;
+ init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage);
+
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo);
+ _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo);
}
protected:
- TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
- DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo,
+ DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo())
{
- return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true>(shape_a, shape_b, shape_output, a_offset, b_offset,
- output_stage, data_type_a, data_type_b, b_qinfo);
+ return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
+ b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
}
- SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
+ SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+ DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, const TensorFillInfo& finfo = TensorFillInfo())
{
- SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b, b_qinfo);
+ SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
TensorShape bias_shape(shape_b[0]);
SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
- fill(bias, 2);
+ (run_twice) ? fill(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target
switch(output_stage.type)
{
case GEMMLowpOutputStageType::QUANTIZE_DOWN:
- return reference::gemmlowp_quantize_down_scale<int32_t, TW>(output, bias,
+ return reference::gemmlowp_quantize_down_scale<int32_t, TI>(output, bias,
output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
break;
case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT:
- return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TW>(output, bias,
+ return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TI>(output, bias,
output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
break;
default:
@@ -277,11 +468,78 @@ protected:
SimpleTensor<TI> _reference{};
};
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
+class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate)
+ {
+ const bool dynamic_qinfo = false;
+ const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
+ const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset);
+ TensorFillInfo finfo;
+ _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
+ _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
+ }
+
+protected:
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
+ {
+ const auto output_qinfo = QuantizationInfo();
+ return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32);
+ }
+
+ SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo)
+ {
+ QuantizationInfo s32_ref_output_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0, dynamic_qinfo);
+
+ SimpleTensor<int32_t> s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo,
+ DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo);
+ s32_ref_output.quantization_info(s32_ref_output_quant_info);
+
+ SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32);
+ f32_ref_output = reference::dequantization_layer<float, int32_t>(s32_ref_output);
+
+ if (accumulate)
+ {
+ SimpleTensor<float> output{ shape_output, DataType::F32, 1 };
+ fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output);
+ reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, output, f32_ref_output, output, ConvertPolicy::SATURATE);
+ return output;
+ }
+
+ return f32_ref_output;
+ }
+
+ TensorType _target{};
+ SimpleTensor<float> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
+class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run)
+ {
+ GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b,
+ shape_output, output_stage_type, data_type, reshape_b_only_on_first_run);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
+class GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run)
+ {
+ GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b, shape_output, output_stage_type, data_type, reshape_b_only_on_first_run);
+ }
+};
+
template <typename TensorType, typename AccessorType, typename FunctionType>
class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
@@ -383,7 +641,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType>
class GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
@@ -485,7 +742,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType>
class GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
@@ -580,7 +836,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType>
class GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
@@ -675,7 +930,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias);
@@ -778,7 +1032,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType>
class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
{
_target = compute_target(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias);
@@ -873,7 +1126,6 @@ template <typename TensorType, typename AccessorType, typename ReshapeLHSOperato
class GEMMLowpMatrixMultiplyReshapedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
bool interleave_rhs, DataType data_type)
{
@@ -944,7 +1196,7 @@ protected:
GEMMFunctionType gemm;
reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
- gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
+ gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -973,7 +1225,8 @@ protected:
reshape_lhs.run(reshape_lhs_pack);
ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
reshape_rhs.run(reshape_rhs_pack);
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1023,7 +1276,6 @@ template <typename TensorType, typename AccessorType, typename ReshapeLHSOperato
class GEMMLowpMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
bool interleave_lhs, bool interleave_rhs, DataType data_type)
{
@@ -1098,7 +1350,7 @@ protected:
GEMMFunctionType gemm;
reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
- gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
+ gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -1127,7 +1379,8 @@ protected:
reshape_lhs.run(reshape_lhs_pack);
ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
reshape_rhs.run(reshape_rhs_pack);
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1179,7 +1432,6 @@ template <typename TensorType, typename AccessorType, typename ReshapeRHSOperato
class GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
{
@@ -1251,7 +1503,7 @@ protected:
ReshapeRHSOperatorType reshape_rhs;
GEMMFunctionType gemm;
reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
- gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -1276,7 +1528,8 @@ protected:
// Compute GEMM
ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
reshape_rhs.run(reshape_rhs_pack);
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1317,11 +1570,372 @@ protected:
SimpleTensor<int32_t> _reference{};
};
+template <typename T, typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType, typename ReduceOperation, typename CastOperation>
+class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
+ unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool broadcast_bias, DataType data_type)
+ {
+ GEMMLowpOutputStageInfo output_stage;
+ output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ output_stage.output_data_type = data_type;
+ output_stage.gemmlowp_multipliers = std::vector<int32_t> { 1 };
+ output_stage.gemmlowp_shifts = std::vector<int32_t> { 1 };
+ output_stage.gemmlowp_multipliers[0] = 1;
+ output_stage.gemmlowp_shifts[0] = 1;
+ output_stage.gemmlowp_offset = 0;
+ constexpr float scale = 0.001f;
+ quantization::calculate_quantized_multiplier(scale, &output_stage.gemmlowp_multipliers[0], &output_stage.gemmlowp_shifts[0]);
+ output_stage.gemmlowp_min_bound = -100;
+ output_stage.gemmlowp_max_bound = 100;
+
+ GEMMLHSMatrixInfo lhs_info;
+ lhs_info.m0 = m0;
+ lhs_info.k0 = k0;
+
+ GEMMRHSMatrixInfo rhs_info;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = transpose_rhs;
+
+ int a_offset = 1;
+ int b_offset = 1;
+
+ // Set the tensor shapes for LHS and RHS matrices
+ const TensorShape lhs_shape(k, m, batch_size);
+ const TensorShape rhs_shape(n, k, batch_size);
+ const TensorShape bias_shape(n,
+ broadcast_bias ? 1 : m,
+ broadcast_bias ? 1 : batch_size);
+
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, output_stage, a_offset, b_offset);
+ if(gemm_validated == true)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, output_stage, a_offset, b_offset);
+ }
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::QASYMM8:
+ {
+ // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+ std::uniform_int_distribution<> distribution(1, 254);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ case DataType::QASYMM8_SIGNED:
+ {
+ std::uniform_int_distribution<> distribution(-127, 126);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ case DataType::S32:
+ {
+ std::uniform_int_distribution<> distribution(-10000, 10000);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type");
+ }
+ }
+
+ TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info, DataType data_type, GEMMLowpOutputStageInfo output_stage, const int a_offset, const int b_offset)
+ {
+ // Create tensors
+ TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset));
+ TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset));
+ TensorType bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
+ TensorType dst;
+ TensorType rhs_reshaped;
+
+ const unsigned int M = lhs_shape[1];
+ const unsigned int N = rhs_shape[0];
+ const unsigned int K = lhs_shape[0];
+
+ // Tensors for precomputing sum of lhs rows / rhs columns
+ TensorType vec_sum_rows = create_tensor<TensorType>(TensorShape(M, 1, lhs_shape[2]), DataType::S32, 1);
+ TensorType vec_sum_cols = create_tensor<TensorType>(TensorShape(N, 1, rhs_shape[2]), DataType::S32, 1);
+
+ GEMMKernelInfo gemm_info;
+ gemm_info.m = M;
+ gemm_info.n = N;
+ gemm_info.k = K;
+ gemm_info.lhs_info = lhs_info;
+ gemm_info.rhs_info = rhs_info;
+ gemm_info.output_stage = output_stage;
+ gemm_info.a_offset = a_offset;
+ gemm_info.b_offset = b_offset;
+ // The output tensor will be auto-initialized within the function
+
+ // Create and configure function
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMFunctionType gemm;
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+ // If GEMM is not validated, do not try to run. The validation will check
+ // if the technology supports this extension. If not, the test will be skipped.
+ // If it supports, the test will fail anyway because target and reference
+ // will not match.
+ gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info()));
+ if(gemm_validated == true)
+ {
+ gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info());
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // Allocate tensors
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ rhs_reshaped.allocator()->allocate();
+ bias.allocator()->allocate();
+ vec_sum_cols.allocator()->allocate();
+ vec_sum_rows.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!vec_sum_cols.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!vec_sum_rows.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(lhs), 0);
+ fill(AccessorType(rhs), 1);
+ fill(AccessorType(bias), 2);
+
+ TensorType lhs_32 = create_tensor<TensorType>(lhs_shape, DataType::S32, 1);
+ TensorType rhs_32 = create_tensor<TensorType>(rhs_shape, DataType::S32, 1);
+ CastOperation cast_lhs;
+ CastOperation cast_rhs;
+ cast_lhs.configure(&lhs, &lhs_32, ConvertPolicy::SATURATE);
+ cast_rhs.configure(&rhs, &rhs_32, ConvertPolicy::SATURATE);
+ lhs_32.allocator()->allocate();
+ rhs_32.allocator()->allocate();
+ cast_lhs.run();
+ cast_rhs.run();
+
+ ReduceOperation lhs_sum_rows;
+ ReduceOperation rhs_sum_cols;
+
+ lhs_sum_rows.configure(&lhs_32, &vec_sum_rows, 0, ReductionOperation::SUM, false);
+ rhs_sum_cols.configure(&rhs_32, &vec_sum_cols, 1, ReductionOperation::SUM);
+
+ lhs_sum_rows.run();
+ rhs_sum_cols.run();
+
+ // Compute GEMM
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst }, { ACL_VEC_COL_SUM, &vec_sum_cols }, { ACL_VEC_ROW_SUM, &vec_sum_rows } });
+ gemm.run(gemm_pack);
+ }
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, GEMMLowpOutputStageInfo output_stage,
+ const int a_offset, const int b_offset)
+ {
+ TensorShape dst_shape = lhs_shape;
+ dst_shape[0] = rhs_shape[0];
+ dst_shape[1] = lhs_shape[1];
+
+ // Create reference
+ SimpleTensor<T> lhs{ lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset) };
+ SimpleTensor<T> rhs{ rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset) };
+ SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
+ SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
+ SimpleTensor<T> dst_final{ dst_shape, data_type, 1 };
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+ fill(bias, 2);
+
+ dst = reference::gemmlowp_matrix_multiply_core<int32_t, T>(lhs, rhs, dst_shape, a_offset, b_offset);
+ dst_final = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, T>(dst, bias,
+ output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
+ return dst_final;
+ }
+
+ bool gemm_validated = true;
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
+class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
+ unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
+ {
+ GEMMLHSMatrixInfo lhs_info;
+ lhs_info.m0 = m0;
+ lhs_info.k0 = k0;
+
+ GEMMRHSMatrixInfo rhs_info;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = transpose_rhs;
+
+ // Set the tensor shapes for LHS and RHS matrices
+ const TensorShape lhs_shape(k, m, batch_size);
+ const TensorShape rhs_shape(n, k, batch_size);
+
+ _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
+ if(gemm_validated == true)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type);
+ }
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::QASYMM8:
+ {
+ // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+ std::uniform_int_distribution<> distribution(1, 254);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ case DataType::QASYMM8_SIGNED:
+ {
+ std::uniform_int_distribution<> distribution(-127, 126);
+ library->fill(tensor, distribution, i);
+ }
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type");
+ }
+ }
+
+ TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
+ {
+ // Create tensors
+ TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+ TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+ TensorType rhs_reshaped;
+ TensorType dst;
+
+ const unsigned int M = lhs_shape[1];
+ const unsigned int N = rhs_shape[0];
+ const unsigned int K = lhs_shape[0];
+
+ GEMMKernelInfo gemm_info;
+ gemm_info.m = M;
+ gemm_info.n = N;
+ gemm_info.k = K;
+ gemm_info.lhs_info = lhs_info;
+ gemm_info.rhs_info = rhs_info;
+ // The output tensor will be auto-initialized within the function
+
+ // Create and configure function
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMFunctionType gemm;
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+ // If GEMM is not validated, do not try to run. The validation will check
+ // if the technology supports this extension. If not, the test will be skipped.
+ // If it supports, the test will fail anyway because target and reference
+ // will not match.
+ gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr));
+ if(gemm_validated == true)
+ {
+ gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+
+ // Allocate tensors
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ rhs_reshaped.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(lhs), 0);
+ fill(AccessorType(rhs), 1);
+
+ // Compute GEMM
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
+ }
+
+ return dst;
+ }
+
+ SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
+ {
+ TensorShape dst_shape = lhs_shape;
+ dst_shape[0] = rhs_shape[0];
+ dst_shape[1] = lhs_shape[1];
+
+ if(data_type == DataType::QASYMM8)
+ {
+ // Create reference
+ SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
+ SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
+ SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+
+ return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
+ }
+ else
+ {
+ // Create reference
+ SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
+ SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
+ SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+
+ return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
+ }
+ }
+
+ bool gemm_validated = true;
+ TensorType _target{};
+ SimpleTensor<int32_t> _reference{};
+};
+
template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
class GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
{
@@ -1397,7 +2011,7 @@ protected:
ReshapeRHSOperatorType reshape_rhs;
GEMMFunctionType gemm;
reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
- gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -1422,7 +2036,8 @@ protected:
// Compute GEMM
ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
reshape_rhs.run(reshape_rhs_pack);
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1469,7 +2084,6 @@ template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
class GEMMLowpMatrixMultiplyNativeValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
{
GEMMLHSMatrixInfo lhs_info;
@@ -1512,7 +2126,7 @@ protected:
// Create and configure function
GEMMFunctionType gemm;
- gemm.configure(&lhs, &rhs, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
+ gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -1533,7 +2147,8 @@ protected:
fill(AccessorType(rhs), 1);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1563,7 +2178,6 @@ template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
class GEMMLowpMatrixMultiplyNative3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
{
GEMMLHSMatrixInfo lhs_info;
@@ -1609,7 +2223,7 @@ protected:
// Create and configure function
GEMMFunctionType gemm;
- gemm.configure(&lhs, &rhs, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
+ gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
@@ -1630,7 +2244,8 @@ protected:
fill(AccessorType(rhs), 1);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1660,4 +2275,4 @@ protected:
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
diff --git a/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h b/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h
index a9d6c9b6aa..d88029f93e 100644
--- a/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h
+++ b/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,6 @@ template <typename TensorType, typename AccessorType, typename OperatorType, typ
class GEMMReshapeLHSMatrixValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape_in, unsigned int batch_size, DataType data_type, unsigned int m0, unsigned int k0, unsigned int v0, bool interleave, bool transpose)
{
GEMMLHSMatrixInfo lhs_info;
diff --git a/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h b/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h
index cdb3ec3944..0929faf04a 100644
--- a/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h
+++ b/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,6 @@ template <typename TensorType, typename AccessorType, typename OperatorType, typ
class GEMMReshapeRHSMatrixValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape_in, unsigned int batch_size, DataType data_type, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave, bool transpose)
{
GEMMRHSMatrixInfo rhs_info;
diff --git a/tests/validation/fixtures/GEMMTranspose1xWFixture.h b/tests/validation/fixtures/GEMMTranspose1xWFixture.h
index 7caea1d3d8..3765515b57 100644
--- a/tests/validation/fixtures/GEMMTranspose1xWFixture.h
+++ b/tests/validation/fixtures/GEMMTranspose1xWFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class GEMMTranspose1xWValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(size_t x, size_t y, DataType data_type)
{
_data_type = data_type;
@@ -89,7 +88,7 @@ protected:
// Create and configure function
FunctionType f;
- f.configure(&a, &b);
+ f.configure(a.info(), b.info());
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
@@ -106,7 +105,8 @@ protected:
fill(AccessorType(b), 1);
// Compute GEMM function
- f.run();
+ ITensorPack tensors{ { ACL_SRC, &a }, { ACL_DST, &b } };
+ f.run(tensors);
return b;
}
diff --git a/tests/validation/fixtures/GatherFixture.h b/tests/validation/fixtures/GatherFixture.h
index 452a201f82..857b0387b7 100644
--- a/tests/validation/fixtures/GatherFixture.h
+++ b/tests/validation/fixtures/GatherFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class GatherFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape indices_shape, int axis, DataType data_type)
{
_target = compute_target(input_shape, data_type, axis, indices_shape);
@@ -67,9 +66,11 @@ protected:
std::mt19937 gen(library->seed());
uint32_t *indices_ptr = static_cast<uint32_t *>(indices.data());
- std::uniform_int_distribution<uint32_t> dist_index(0, input_shape[actual_axis] - 1);
- //Let's consider 1D indices
- for(unsigned int ind = 0; ind < indices_shape[0]; ind++)
+ // 10% of the time the index is out-of-range.
+ uint32_t max_index = input_shape[actual_axis] + input_shape[actual_axis] / 9 + 1;
+ std::uniform_int_distribution<uint32_t> dist_index(0, max_index - 1);
+
+ for(unsigned int ind = 0; ind < indices_shape.total_size(); ind++)
{
indices_ptr[ind] = dist_index(gen);
}
diff --git a/tests/validation/fixtures/Im2ColFixture.h b/tests/validation/fixtures/Im2ColFixture.h
index b1fbd76eb2..5c7978f4ab 100644
--- a/tests/validation/fixtures/Im2ColFixture.h
+++ b/tests/validation/fixtures/Im2ColFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,10 +45,9 @@ namespace validation
using namespace arm_compute::misc::shape_calculator;
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z>
-class Im2ColValidationFixture : public framework::Fixture
+class Im2ColOpValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, DataType data_type, const Size2D &kernel_dims, const PadStrideInfo &conv_info, const QuantizationInfo &quant_info, const DataLayout &data_layout,
unsigned int num_groups)
{
@@ -88,7 +87,7 @@ protected:
// Create and configure function
FunctionType im2col_func;
- im2col_func.configure(&src, &dst, _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), _num_groups);
+ im2col_func.configure(src.info(), dst.info(), _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), _num_groups);
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
@@ -103,8 +102,13 @@ protected:
// Fill tensors
fill(AccessorType(src));
+ arm_compute::ITensorPack pack =
+ {
+ { arm_compute::TensorType::ACL_SRC, &src },
+ { arm_compute::TensorType::ACL_DST, &dst }
+ };
// Compute function
- im2col_func.run();
+ im2col_func.run(pack);
return dst;
}
diff --git a/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h b/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h
new file mode 100644
index 0000000000..7374093f51
--- /dev/null
+++ b/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE
+#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/Globals.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/IndirectConv2dAddressPrecalculation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename TensorType, typename AccessorType, typename OperatorType>
+class IndirectConv2dAddressPrecalculationValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int src_w,
+ unsigned int src_h,
+ unsigned int src_b,
+ unsigned int wei_w,
+ unsigned int wei_h,
+ unsigned int pad,
+ unsigned int stride,
+ unsigned int m0)
+ {
+ DirectConvComputeKernelInfo desc;
+ desc.m0 = m0;
+ desc.n0 = 1; // Not used by the kernel
+ desc.k0 = 1; // Not used by the kernel
+ desc.export_weights_to_cl_image = false; // Not used by the kernel
+
+ const PadStrideInfo conv_info(stride, stride, pad, pad);
+
+ const TensorShape shape_conv_src(23, // The input channels are not used by the kernel
+ src_w,
+ src_h,
+ src_b);
+
+ const TensorShape shape_conv_wei(23, // The input channels are not used by the kernel
+ wei_w,
+ wei_h,
+ 23 // The output channels are not used by the kernel
+ );
+
+ // The result of the kernel does not change with the datatype. Hence, we can fix it to Fp16 for validation purposes
+ const DataType data_type = DataType::F16;
+
+ _target = compute_target(shape_conv_src, shape_conv_wei, data_type, conv_info, desc);
+ _reference = compute_reference(shape_conv_src, shape_conv_wei, data_type, conv_info, desc);
+ }
+
+protected:
+ TensorType compute_target(TensorShape shape_conv_src, TensorShape shape_conv_wei, DataType data_type, const PadStrideInfo &conv_info, const DirectConvComputeKernelInfo &desc)
+ {
+ TensorInfo src_conv_info(shape_conv_src, 1, data_type, DataLayout::NHWC);
+ TensorInfo wei_conv_info(shape_conv_wei, 1, data_type, DataLayout::NHWC);
+ TensorType dst;
+
+ // The output tensor will be auto-initialized within the function
+
+ // Create and configure function
+ OperatorType func;
+ func.configure(&src_conv_info, &wei_conv_info, dst.info(), conv_info, desc);
+
+ add_padding_x({ &dst });
+
+ // Allocate tensors
+ dst.allocator()->allocate();
+
+ // Compute GEMM LHS matrix reshape function
+ ITensorPack tensors = { { ACL_DST, &dst } };
+ func.run(tensors);
+
+ return dst;
+ }
+
+ SimpleTensor<int32_t> compute_reference(TensorShape shape_conv_src, TensorShape shape_conv_wei, DataType data_type, const PadStrideInfo &conv_info, const DirectConvComputeKernelInfo &desc)
+ {
+ ARM_COMPUTE_UNUSED(data_type);
+ TensorShape shape_out = compute_indirect_buffer_shape(shape_conv_src, DataLayout::NHWC, shape_conv_wei, conv_info, desc);
+ TensorShape output_conv_shape = compute_deep_convolution_shape(shape_conv_src, DataLayout::NHWC, shape_conv_wei, conv_info);
+
+ return reference::indirect_conv2d_addr_precalculation(shape_conv_src, shape_conv_wei, output_conv_shape, shape_out, conv_info);
+ }
+
+ TensorType _target{};
+ SimpleTensor<int32_t> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE */ \ No newline at end of file
diff --git a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
index 0f8c155277..c26dd99f02 100644
--- a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class InstanceNormalizationLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, DataLayout data_layout, bool in_place)
{
_target = compute_target(shape, data_type, data_layout, in_place);
diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h
index 2dea66c98b..b8f4b1eaf3 100644
--- a/tests/validation/fixtures/L2NormalizeLayerFixture.h
+++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,7 +48,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class L2NormalizeLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon)
{
_target = compute_target(shape, data_type, data_layout, axis, epsilon);
diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h
index f4bae86d30..a32e9adfe5 100644
--- a/tests/validation/fixtures/LSTMLayerFixture.h
+++ b/tests/validation/fixtures/LSTMLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class LSTMLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape input_weights_shape, TensorShape recurrent_weights_shape, TensorShape cell_bias_shape, TensorShape output_cell_shape, TensorShape output_shape,
TensorShape scratch_shape, ActivationLayerInfo info, float cell_threshold, float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt,
bool use_layer_norm)
@@ -458,7 +457,6 @@ protected:
}
input_gate = reference::activation_layer(input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
}
-
// Compute cell_state
SimpleTensor<T> fully_connected_cell_state = reference::fully_connected_layer(input, input_to_cell_w, cell_bias, output_cell_shape);
transposed_weights = reference::transpose(recurrent_to_cell_w);
@@ -474,12 +472,13 @@ protected:
fill(cell_bias, 8);
cell_state_out = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, cell_state_out, cell_bias, data_type, ConvertPolicy::SATURATE);
}
- cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ cell_state_out = reference::activation_layer(cell_state_out, info);
cell_state_out = reference::pixel_wise_multiplication<T, T, T>(cell_state_out, input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN, data_type);
cell_state_out = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, cell_state_out, pixelwise_mul, data_type, ConvertPolicy::SATURATE);
+
if(cell_threshold != 0.f)
{
- cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold));
+ cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, cell_threshold, -cell_threshold));
}
// Compute output
@@ -515,7 +514,6 @@ protected:
output_state_out = reference::activation_layer(fully_connected_projection, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold));
}
}
-
std::vector<SimpleTensor<T>> scratch_inputs;
if(!cifg_opt)
{
diff --git a/tests/validation/fixtures/LogicalFixture.h b/tests/validation/fixtures/LogicalFixture.h
index c7dd1f6510..60dc963ba7 100644
--- a/tests/validation/fixtures/LogicalFixture.h
+++ b/tests/validation/fixtures/LogicalFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -79,7 +79,6 @@ class LogicalBinaryOperationValidationFixture : public LogicalOperationValidatio
using Parent = LogicalOperationValidationFixtureBase<TensorType, AccessorType, FunctionType, T>;
public:
- template <typename...>
void setup(TensorShape shape0, TensorShape shape1)
{
Parent::_target = compute_target(shape0, shape1);
@@ -135,7 +134,6 @@ class LogicalNotValidationFixture : public LogicalOperationValidationFixtureBase
using Parent = LogicalOperationValidationFixtureBase<TensorType, AccessorType, FunctionType, T>;
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
Parent::_target = compute_target(shape, data_type);
diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h
new file mode 100644
index 0000000000..ffd12e56d0
--- /dev/null
+++ b/tests/validation/fixtures/MatMulFixture.h
@@ -0,0 +1,612 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+#include "src/core/utils/quantization/AsymmHelpers.h"
+#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/GEMM.h"
+#include "tests/validation/reference/GEMMLowp.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+#include "tests/validation/Validation.h"
+
+#include <limits>
+#include <random>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulGenericValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs,
+ Settings settings,
+ QuantizationInfo a_qinfo = QuantizationInfo(),
+ QuantizationInfo b_qinfo = QuantizationInfo(),
+ QuantizationInfo o_qinfo = QuantizationInfo())
+ {
+ // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices.
+ if (transpose_a)
+ {
+ permute(shape_a, PermutationVector(1U, 0U));
+ }
+ if (transpose_b)
+ {
+ permute(shape_b, PermutationVector(1U, 0U));
+ }
+
+ _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info,
+ num_extra_runs, settings, a_qinfo, b_qinfo, o_qinfo);
+ _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info,
+ a_qinfo, b_qinfo, o_qinfo);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f)
+ {
+ switch (tensor.data_type())
+ {
+ case DataType::BFLOAT16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{float(lo), float(hi)};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{float(lo), float(hi)};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(lo, hi);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ {
+ library->fill_tensor_uniform(tensor, i);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported data type.");
+ }
+ }
+ }
+
+ virtual TensorType compute_target(const TensorShape &shape_a,
+ const TensorShape &shape_b,
+ const TensorShape &output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs,
+ const Settings &settings,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo)
+ {
+ // 1. Create Classes and configure function
+ // ----------------------------------------------------
+ // Create tensors
+ // Configure relevant classes and matmul function
+ TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo);
+ TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, b_qinfo);
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, o_qinfo);
+
+ FunctionType matmul;
+
+ // Configure MatMulInfo class
+ MatMulInfo mm_info;
+ mm_info.adj_lhs(transpose_a).adj_rhs(transpose_b);
+
+ // Ensure values are dynamic
+ a.info()->set_are_values_constant(false);
+ b.info()->set_are_values_constant(false);
+
+ // Configure operator
+ matmul.configure(&a, &b, &dst, mm_info, settings, act_info);
+
+ // Assertions
+ ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ // Allocate tensors
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // For multiple runs.
+ for (int i = 0; i < num_extra_runs; i++)
+ {
+ // Stress dynamic tensors by running multiple times.
+ // --------------------------------------------------------
+ // Fill tensors with new seed
+ // Run function
+ const int seed_offset = num_extra_runs * 100;
+ fill(AccessorType(a), seed_offset);
+ fill(AccessorType(b), seed_offset + 1);
+
+ matmul.run();
+ }
+
+ // 2. Final Run for reference comparison
+ // --------------------------------------------------------
+ // Re-fill tensors same seed as reference run
+ // Compute MatMul operation
+ fill(AccessorType(a), 2);
+ fill(AccessorType(b), 3);
+
+ matmul.run();
+
+ return dst;
+ }
+
+ template <typename TT>
+ typename std::enable_if < !std::is_integral<TT>::value, SimpleTensor<TT >>::type
+ compute_reference_gemm(const SimpleTensor<TT> &a,
+ const SimpleTensor<TT> &b,
+ const SimpleTensor<TT> &c,
+ float alpha,
+ float beta,
+ const QuantizationInfo &o_qinfo)
+ {
+ ARM_COMPUTE_UNUSED(o_qinfo);
+
+ return reference::gemm(a, b, c, alpha, beta);
+ }
+
+ template <typename TT>
+ typename std::enable_if<std::is_integral<TT>::value, SimpleTensor<TT>>::type
+ compute_reference_gemm(const SimpleTensor<TT> &a,
+ const SimpleTensor<TT> &b,
+ const SimpleTensor<TT> &c,
+ float alpha,
+ float beta,
+ const QuantizationInfo &o_qinfo)
+ {
+ ARM_COMPUTE_UNUSED(alpha, beta);
+
+ const auto aq = a.quantization_info().uniform();
+ const auto bq = b.quantization_info().uniform();
+ const auto oq = o_qinfo.uniform();
+
+ const auto multiplier = aq.scale * bq.scale / oq.scale;
+
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
+ std::vector<int32_t> output_multipliers{output_multiplier};
+ std::vector<int32_t> output_shifts{output_shift};
+
+ //The lhs and rhs offsets are negated here to keep the reference aligned with the function implementation where the lhs and rhs offsets are also negated.
+ const auto tmp = reference::gemmlowp_matrix_multiply_core<int32_t>(a, b, c.shape(), -aq.offset, -bq.offset);
+
+ auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TT>(
+ tmp, output_multipliers, output_shifts, oq.offset, std::numeric_limits<int32_t>::lowest(),
+ std::numeric_limits<int32_t>::max());
+ output.quantization_info(o_qinfo);
+
+ return output;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &a_shape,
+ const TensorShape &b_shape,
+ const TensorShape &output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo)
+ {
+ // We collapse dimensions > 2 onto dimension 2, i.e. 4D+ tensors will look like 3D
+ // This is necessary unless we choose to extend gemm reference for 4D+ tensors
+ TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ);
+ TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimZ);
+ TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimZ);
+
+ // Create reference
+ SimpleTensor<T> a{a_shape_collapsed, data_type, 1, a_qinfo};
+ SimpleTensor<T> b{b_shape_collapsed, data_type, 1, b_qinfo};
+ SimpleTensor<T> c{output_shape_collapsed, data_type, 1};
+
+ // Fill reference
+ fill(a, 2);
+ fill(b, 3);
+
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if transpose_a is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if transpose_b is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape());
+ a_transposed_shape.set(0, a.shape().y());
+ a_transposed_shape.set(1, a.shape().x());
+
+ TensorShape b_transposed_shape(b.shape());
+ b_transposed_shape.set(0, b.shape().y());
+ b_transposed_shape.set(1, b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{a_transposed_shape, data_type};
+ SimpleTensor<T> b_transposed{b_transposed_shape, data_type};
+
+ // pretranspose a if necessary
+ if (transpose_a)
+ {
+ a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U));
+ }
+ // pretranspose b if necessary
+ if (transpose_b)
+ {
+ b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U));
+ }
+
+ // Setting beta to 0 will effectively disable C for the
+ // computation of the reference: alpha * A * B + 0 * C
+ // Use transposed tensors if boolean enabled else use original tensors
+ auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c,
+ 1.0f, 0.f, o_qinfo);
+
+ result = reference::activation_layer<T>(result, act_info, o_qinfo);
+
+ // We reshape the gemm output back if the tensor is high dimensional
+ if (output_shape_collapsed != output_shape)
+ {
+ result = reference::reshape_layer(result, output_shape);
+ }
+
+ return result;
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+/// TODO: (ONCPUML-1451) The current state of this fixture is interim and a longer-term testing method will be implemented later.
+/// @note: Currently we support only a 2x2 test due to the lack of reorder ref. implementation.
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulFixedFormatFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ TensorType compute_target(const TensorShape &shape_a,
+ const TensorShape &shape_b,
+ const TensorShape &output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs,
+ const Settings &settings,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo) override
+ {
+ // 1. Create Classes and configure function
+ // ----------------------------------------------------
+ // Create tensors
+ // Configure relevant classes and matmul function
+ TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo);
+ TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, b_qinfo);
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, o_qinfo);
+
+ const auto weight_tensor_info = TensorInfo(*b.info());
+ const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info);
+ TensorType weights_transformed = create_tensor<TensorType>(new_tensor_info);
+
+ // Configure MatMulInfo class
+ MatMulInfo mm_info;
+ mm_info.adj_lhs(transpose_a).adj_rhs(transpose_b);
+
+ // Ensure values are dynamic
+ a.info()->set_are_values_constant(false);
+ b.info()->set_are_values_constant(false);
+ weights_transformed.info()->set_are_values_constant(false);
+
+ FunctionType matmul;
+
+ // Configure operator
+ matmul.configure(&a, &weights_transformed, &dst, mm_info, settings, act_info);
+
+ // Assertions
+ ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(weights_transformed.info()->is_resizable());
+
+ // Allocate tensors
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ dst.allocator()->allocate();
+ weights_transformed.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!weights_transformed.info()->is_resizable());
+
+ // For multiple runs.
+ for (int i = 0; i < num_extra_runs; i++)
+ {
+ // Stress dynamic tensors by running multiple times.
+ // --------------------------------------------------------
+ // Fill tensors with new seed
+ // Run function
+ const int seed_offset = num_extra_runs * 100;
+ this->fill(AccessorType(a), seed_offset);
+ this->fill(AccessorType(b), seed_offset + 1);
+
+ matmul.run();
+ }
+
+ // 2. Final Run for reference comparison
+ // --------------------------------------------------------
+ // Re-fill tensors same seed as reference run
+ // Compute MatMul operation
+ this->fill(AccessorType(a), 2);
+ this->fill(AccessorType(b), 3);
+
+ rearrange_data(AccessorType(b), AccessorType(weights_transformed));
+
+ matmul.run();
+
+ return dst;
+ }
+
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs,
+ Settings settings,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo)
+ {
+ if (CPUInfo::get().has_bf16())
+ {
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings,
+ a_qinfo, b_qinfo, o_qinfo);
+ }
+ }
+
+private:
+ TensorInfo prepare_weights(const TensorInfo tensor_info)
+ {
+ const DataLayout data_layout = tensor_info.data_layout();
+ ARM_COMPUTE_EXPECT(data_layout == DataLayout::NCHW, framework::LogLevel::ERRORS);
+ const DataType data_type = tensor_info.data_type();
+ const TensorShape tensor_shape = tensor_info.tensor_shape();
+ const int H = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
+ const int W = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
+ ARM_COMPUTE_EXPECT(H <= 2 && W <= 2, framework::LogLevel::ERRORS);
+
+ arm_compute::Strides strides_in_bytes = tensor_info.strides_in_bytes();
+ strides_in_bytes.set(1, 32);
+ strides_in_bytes.set(2, 32);
+
+ const size_t offset_first_element_in_bytes = tensor_info.offset_first_element_in_bytes();
+ const size_t total_size_in_bytes = 32;
+
+ const TensorShape TS(H, W);
+
+ TensorInfo new_tensor_info = tensor_info;
+ new_tensor_info.init(TS, tensor_info.num_channels(), data_type, strides_in_bytes, offset_first_element_in_bytes,
+ total_size_in_bytes);
+
+ return new_tensor_info;
+ }
+
+ void rearrange_data(const AccessorType src, AccessorType dst)
+ {
+ const TensorShape src_tensor_shape = src.shape();
+ const DataLayout data_layout = src.data_layout();
+ ARM_COMPUTE_EXPECT(data_layout == DataLayout::NCHW, framework::LogLevel::ERRORS);
+ const unsigned int O =
+ src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O
+ const unsigned int H =
+ src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
+ const unsigned int W =
+ src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
+ const unsigned int I =
+ src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I
+ ARM_COMPUTE_EXPECT(H <= 2 && W <= 2, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(I == 1 && O == 1, framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(src.num_elements() <= dst.num_elements(), framework::LogLevel::ERRORS);
+
+ const T *src_ptr = reinterpret_cast<const T *>(src.data());
+ T *dst_ptr = reinterpret_cast<T *>(dst.data());
+
+ // rearrange indexes for 2x2 input and weight
+ int dst_idx[] = {0, 4, 1, 5};
+ for (int i = 0; i < 4; i++)
+ {
+ dst_ptr[dst_idx[i]] = src_ptr[i];
+ }
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulValidationFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type)
+ {
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, ActivationLayerInfo(), 0, Settings());
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulValidationWithDynamicTensorsFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs)
+ {
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings());
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class QuantizedMatMulValidationFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info,
+ int num_extra_runs,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo)
+ {
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(),
+ a_qinfo, b_qinfo, o_qinfo);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulValidationWithActivationFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo act_info)
+ {
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, 0, Settings());
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class MatMulValidationWithActivationAlphaBetaFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo::ActivationFunction function,
+ float alpha_beta)
+ {
+ ActivationLayerInfo act_info(function, alpha_beta, alpha_beta);
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, 0, Settings());
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T>
+class QuantizedMatMulValidationWithActivationFixture
+ : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>
+{
+public:
+ void setup(TensorShape shape_a,
+ TensorShape shape_b,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ DataType data_type,
+ ActivationLayerInfo::ActivationFunction function,
+ float alpha_beta,
+ int num_extra_runs,
+ QuantizationInfo a_qinfo,
+ QuantizationInfo b_qinfo,
+ QuantizationInfo o_qinfo)
+ {
+ ActivationLayerInfo act_info(function, alpha_beta, alpha_beta);
+ MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup(
+ shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(),
+ a_qinfo, b_qinfo, o_qinfo);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H
diff --git a/tests/validation/fixtures/MatMulKernelFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h
new file mode 100644
index 0000000000..26072dff65
--- /dev/null
+++ b/tests/validation/fixtures/MatMulKernelFixture.h
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/GEMM.h"
+#include "tests/validation/reference/GEMMLowp.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+#include <cmath>
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::opencl::kernels;
+
+template <typename T, typename KernelType, bool use_mmul = false>
+class MatMulKernelGenericValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type,
+ bool enable_bias)
+ {
+ // This hash is used by random generators. There may be hash collisions but
+ // this is intentional as it's a very easy way to make the the current
+ // random generation process almost different for many test configurations,
+ // which were using the same set of values before.
+ _hash = M0 + N0 + K0 + shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1] + enable_bias + export_rhs_to_cl_image;
+
+ // Flag to create a bias
+ _enable_bias = enable_bias;
+
+ // For brevity, the input shapes are assumed to be not-transposed for both Lhs and Rhs matrices.
+ QuantizationInfo lhs_q_info;
+ QuantizationInfo rhs_q_info;
+ QuantizationInfo dst_q_info;
+
+ if(is_data_type_quantized(data_type))
+ {
+ const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+ const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+ std::mt19937 generator(library->seed() + _hash);
+ std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f);
+ std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+ const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+ const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+ const int32_t offset_lhs = distribution_t(generator);
+ const int32_t offset_rhs = distribution_t(generator);
+
+ lhs_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+ rhs_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+ const int m = shape_a.y();
+ const int n = shape_b.x();
+ const int k = shape_a.x();
+
+ const float bias_fraction = enable_bias ? 0.5f : 0.f;
+
+ QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(lhs_q_info, rhs_q_info, m, n, k, data_type, bias_fraction);
+ dst_q_info = q_hint.q_info;
+ _min_bias = q_hint.bias_min;
+ _max_bias = q_hint.bias_max;
+ }
+
+ if(pretranspose_a)
+ {
+ permute(shape_a, PermutationVector(1U, 0U));
+ }
+
+ if(pretranspose_b)
+ {
+ permute(shape_b, PermutationVector(1U, 0U));
+ }
+
+ // Skip configurations unsupported by the device.
+ _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+ if(!_device_supports_export_to_cl_image && export_rhs_to_cl_image)
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs.
+ }
+
+ _device_supports_mmul = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device());
+ if(!_device_supports_mmul && use_mmul)
+ {
+ ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs.
+ }
+
+ _target = compute_target(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, lhs_q_info, rhs_q_info, dst_q_info);
+ _reference = compute_reference(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type, lhs_q_info, rhs_q_info, dst_q_info);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) };
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(lo, hi);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ template <typename U>
+ void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max)
+ {
+ std::uniform_int_distribution<int32_t> distribution(min, max);
+ library->fill(tensor, distribution, i);
+ }
+
+ template <typename U, typename D>
+ void fill_constant(U &&tensor, D value)
+ {
+ library->fill_tensor_value(tensor, value);
+ }
+
+ CLTensor compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, const int M0, const int N0, const int K0,
+ bool export_rhs_to_cl_image, DataType data_type, const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info)
+ {
+ CLSynthetizeOperator<KernelType> matMul{};
+ MatMulKernelInfo matmul_info;
+ matmul_info.adj_lhs = pretranspose_a;
+ matmul_info.adj_rhs = pretranspose_b;
+ matmul_info.m0 = M0;
+ matmul_info.n0 = N0;
+ matmul_info.k0 = K0;
+ matmul_info.export_rhs_to_cl_image = export_rhs_to_cl_image;
+
+ bool is_quantized = is_data_type_quantized(data_type);
+
+ // Create tensors
+ CLTensor a = create_tensor<CLTensor>(shape_a, data_type, 1, lhs_q_info);
+ CLTensor b = create_tensor<CLTensor>(shape_b, data_type, 1, rhs_q_info);
+ CLTensor bias = create_tensor<CLTensor>(output_shape[0], (is_quantized) ? DataType::S32 : data_type, 1, dst_q_info);
+ CLTensor dst = create_tensor<CLTensor>(output_shape, data_type, 1, dst_q_info);
+
+ matMul.configure(a.info(), b.info(), (_enable_bias) ? bias.info() : nullptr, dst.info(), matmul_info);
+ ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ // Allocate tensors
+ a.allocator()->allocate();
+ b.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(CLAccessor(a), _hash + 1);
+ fill(CLAccessor(b), _hash + 2);
+
+ // Compute matMul kernel
+ ITensorPack tensors_pack({ { ACL_SRC_0, &a },
+ { ACL_SRC_1, &b },
+ { ACL_DST, &dst }
+ });
+
+ if(_enable_bias)
+ {
+ // Allocate, fill and add bias to TensorPack obj
+ bias.allocator()->allocate();
+ if(is_quantized)
+ {
+ fill_bias_s32(CLAccessor(bias), _hash + 3, _min_bias, _max_bias);
+ }
+ else
+ {
+ fill(CLAccessor(bias), _hash + 3);
+ }
+ tensors_pack.add_tensor(ACL_SRC_2, &bias);
+ }
+
+ matMul.run(tensors_pack);
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type,
+ const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info)
+ {
+ // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D
+ // This is necessary unless we choose to extend gemm reference for 5D+ tensors
+ TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ);
+ TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ);
+ TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ);
+
+ // Create reference
+ SimpleTensor<T> a{ shape_a_collapsed, data_type, 1, lhs_q_info };
+ SimpleTensor<T> b{ shape_b_collapsed, data_type, 1, rhs_q_info };
+ SimpleTensor<T> c{ output_shape_collapsed, data_type, 1, dst_q_info };
+
+ // Fill reference
+ fill(a, _hash + 1);
+ fill(b, _hash + 2);
+
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape());
+ a_transposed_shape.set(0, a.shape().y());
+ a_transposed_shape.set(1, a.shape().x());
+
+ TensorShape b_transposed_shape(b.shape());
+ b_transposed_shape.set(0, b.shape().y());
+ b_transposed_shape.set(1, b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
+ SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
+
+ // pretranspose a if necessary
+ if(pretranspose_a)
+ {
+ a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U));
+ }
+
+ // pretranspose b if necessary
+ if(pretranspose_b)
+ {
+ b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U));
+ }
+
+ // Use transposed tensors if boolean enabled else use original tensors
+ SimpleTensor<T> result = gemm_reference<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c);
+
+ // We reshape the gemm output back if the tensor is high dimensional
+ if(output_shape_collapsed != output_shape)
+ {
+ result = reference::reshape_layer(result, output_shape);
+ }
+
+ return result;
+ }
+
+ template <typename U = T>
+ typename std::enable_if < std::is_same<U, float>::value || std::is_same<U, half>::value, SimpleTensor<U >>::type gemm_reference(SimpleTensor<U> &a, SimpleTensor<U> &b, SimpleTensor<U> &c)
+ {
+ // Fill bias, then copy first dimension into subsequent dimensions to mimic broadcast
+ // of bias tensor from shape [dst.dimension(0)] to [dst.tensor_shape()] in target kernel
+ if(_enable_bias)
+ {
+ fill(c, _hash + 3);
+ const int n = c.shape().x();
+ const int other_dims = c.shape().collapsed_from(1)[1];
+ for(int i = 1; i < other_dims; ++i) // For all data, copy first n elements into remaining batches
+ {
+ memcpy(c.data() + i * n, c.data(), n * sizeof(T));
+ }
+ }
+ // Setting beta to 0 will effectively disable C for the
+ // computation of the reference: alpha * A * B + 0 * C
+ return reference::gemm<U>(a, b, c, 1.0f, (_enable_bias) ? 1.0f : 0.f);
+ }
+
+ template <typename U = T>
+ typename std::enable_if < std::is_same<U, int8_t>::value || std::is_same<U, uint8_t>::value, SimpleTensor<U >>::type gemm_reference(SimpleTensor<U> &a, SimpleTensor<U> &b, SimpleTensor<U> &c)
+ {
+ const UniformQuantizationInfo aq = a.quantization_info().uniform();
+ const UniformQuantizationInfo bq = b.quantization_info().uniform();
+ const UniformQuantizationInfo cq = c.quantization_info().uniform();
+
+ const SimpleTensor<int32_t> result = reference::gemmlowp_matrix_multiply_core<int32_t, U, U>(a, b, c.shape(), -aq.offset, -bq.offset);
+
+ std::vector<int32_t> gemmlowp_multipliers{ 1 };
+ std::vector<int32_t> gemmlowp_shifts{ 1 };
+ const int gemmlowp_offset = cq.offset;
+ const float scale = aq.scale * bq.scale / cq.scale;
+
+ quantization::calculate_quantized_multiplier(scale, &gemmlowp_multipliers[0], &gemmlowp_shifts[0]);
+ constexpr int32_t gemmlowp_min_bound = std::numeric_limits<int32_t>::min();
+ constexpr int32_t gemmlowp_max_bound = std::numeric_limits<int32_t>::max();
+
+ SimpleTensor<int> bias{ c.shape(), DataType::S32 };
+ if(_enable_bias)
+ {
+ // Identical to float implementation, fill and copy values of bias first dimension
+ fill_bias_s32(bias, _hash + 3, _min_bias, _max_bias);
+ const int n = bias.shape().x();
+ const int other_dims = bias.shape().collapsed_from(1)[1];
+ const unsigned int dt_size = sizeof(int32_t);
+ for(int i = 1; i < other_dims; ++i)
+ {
+ memcpy(bias.data() + i * n, bias.data(), n * dt_size);
+ }
+ }
+ else
+ {
+ fill_constant(bias, static_cast<int32_t>(0)); // effectively disable bias
+ }
+
+ const SimpleTensor<U> final_result = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, U>(result, bias,
+ gemmlowp_multipliers, gemmlowp_shifts, gemmlowp_offset, gemmlowp_min_bound, gemmlowp_max_bound);
+
+ return final_result;
+ }
+
+ CLTensor _target{};
+ SimpleTensor<T> _reference{};
+ bool _enable_bias{ false };
+ bool _device_supports_export_to_cl_image{ true };
+ bool _device_supports_mmul{ true };
+ int32_t _min_bias{ 0 };
+ int32_t _max_bias{ 0 };
+ int32_t _hash{ 0 };
+};
+
+template <typename T, typename KernelType, bool use_mmul = false>
+class MatMulKernelValidationFixture : public MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type)
+ {
+ MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type,
+ false /* enable bias */);
+ }
+};
+
+template <typename T, typename KernelType, bool use_mmul = false>
+class MatMulKernelWithBiasValidation : public MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type)
+ {
+ MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type,
+ true /* enable bias */);
+ }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
diff --git a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
index b2575aa26b..808e3ffabd 100644
--- a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
+++ b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename PoolingFunctionTy
class MaxUnpoolingLayerValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout)
{
std::mt19937 gen(library->seed());
@@ -150,7 +149,6 @@ template <typename TensorType, typename AccessorType, typename F1, typename F2,
class MaxUnpoolingLayerValidationFixture : public MaxUnpoolingLayerValidationGenericFixture<TensorType, AccessorType, F1, F2, T>
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout)
{
MaxUnpoolingLayerValidationGenericFixture<TensorType, AccessorType, F1, F2, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, true),
diff --git a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
index 9868cd1abf..bf5d20790c 100644
--- a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,30 +44,35 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class MeanStdDevNormalizationLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8f)
+ void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8)
{
- _data_type = dt;
- _target = compute_target(shape, dt, in_place, epsilon);
- _reference = compute_reference(shape, dt, epsilon);
+ QuantizationInfo qi = QuantizationInfo(0.5f, 10);
+ _data_type = dt;
+ _target = compute_target(shape, dt, in_place, epsilon, qi);
+ _reference = compute_reference(shape, dt, epsilon, qi);
}
protected:
template <typename U>
- void fill(U &&src_tensor)
+ void fill(U &&tensor)
{
- static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
- using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
-
- DistributionType distribution{ T(-1.0f), T(1.0f) };
- library->fill(src_tensor, distribution, 0);
+ if(is_data_type_float(_data_type))
+ {
+ std::uniform_real_distribution<> distribution{ -1.0f, 1.0f };
+ library->fill(tensor, distribution, 0);
+ }
+ else
+ {
+ std::uniform_int_distribution<> distribution{ 0, 255 };
+ library->fill(tensor, distribution, 0);
+ }
}
- TensorType compute_target(TensorShape shape, DataType dt, bool in_place, float epsilon)
+ TensorType compute_target(TensorShape shape, DataType dt, bool in_place, float epsilon, QuantizationInfo qi)
{
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, dt, 1);
- TensorType dst;
+ TensorType src = create_tensor<TensorType>(shape, dt, 1, qi);
+ TensorType dst = create_tensor<TensorType>(shape, dt, 1, qi);
TensorType *dst_ptr = in_place ? &src : &dst;
@@ -104,10 +109,10 @@ protected:
}
}
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType dt, float epsilon)
+ SimpleTensor<T> compute_reference(const TensorShape &shape, DataType dt, float epsilon, QuantizationInfo qi)
{
// Create reference
- SimpleTensor<T> ref_src{ shape, dt, 1 };
+ SimpleTensor<T> ref_src{ shape, dt, 1, qi };
// Fill reference
fill(ref_src);
@@ -119,6 +124,7 @@ protected:
SimpleTensor<T> _reference{};
DataType _data_type{};
};
+
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/fixtures/NonMaxSuppressionFixture.h b/tests/validation/fixtures/NonMaxSuppressionFixture.h
index abfe909fc4..043b4731aa 100644
--- a/tests/validation/fixtures/NonMaxSuppressionFixture.h
+++ b/tests/validation/fixtures/NonMaxSuppressionFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType>
class NMSValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, unsigned int max_output_size, float score_threshold, float nms_threshold)
{
ARM_COMPUTE_ERROR_ON(max_output_size == 0);
diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h
index 8a29e64826..ddaa3533f5 100644
--- a/tests/validation/fixtures/NormalizationLayerFixture.h
+++ b/tests/validation/fixtures/NormalizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class NormalizationValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout)
{
NormalizationLayerInfo info(norm_type, norm_size, 5, beta, 1.f, is_scaled);
@@ -119,7 +118,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class NormalizationValidationFixture : public NormalizationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout)
{
NormalizationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, norm_type, norm_size, beta, is_scaled, data_type, data_layout);
diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
index 4586afbb5d..5f2c865950 100644
--- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
+++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class NormalizePlanarYUVLayerValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout, QuantizationInfo quantization_info)
{
_data_type = dt;
@@ -144,7 +143,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class NormalizePlanarYUVLayerValidationFixture : public NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout)
{
NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, dt, data_layout, QuantizationInfo());
@@ -155,7 +153,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class NormalizePlanarYUVLayerValidationQuantizedFixture : public NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout, QuantizationInfo quantization_info)
{
NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, dt, data_layout, quantization_info);
diff --git a/tests/validation/fixtures/PadLayerFixture.h b/tests/validation/fixtures/PadLayerFixture.h
index 1951813df8..93b43616ff 100644
--- a/tests/validation/fixtures/PadLayerFixture.h
+++ b/tests/validation/fixtures/PadLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PaddingFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, const PaddingList &padding, const PaddingMode mode)
{
PaddingList clamped_padding = padding;
diff --git a/tests/validation/fixtures/PermuteFixture.h b/tests/validation/fixtures/PermuteFixture.h
index 118971394a..b1b3845a8d 100644
--- a/tests/validation/fixtures/PermuteFixture.h
+++ b/tests/validation/fixtures/PermuteFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PermuteValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, PermutationVector perm, DataType data_type)
{
_target = compute_target(input_shape, data_type, perm);
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
index 7c643bd726..4345d8a13f 100644
--- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationGenericValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(const TensorShape &shape0,
const TensorShape &shape1,
DataType dt_in1,
@@ -76,9 +75,29 @@ protected:
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info)
{
// Create tensors
- TensorType src1 = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0);
- TensorType src2 = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1);
- TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), dt_out, 1, qinfo_out);
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ TensorType src1 = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0);
+ TensorType src2 = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1);
+ TensorType dst = create_tensor<TensorType>(out_shape, dt_out, 1, qinfo_out);
+
+ // Check whether do in-place computation and whether inputs are broadcast compatible
+ TensorType *actual_dst = &dst;
+ if(_is_inplace)
+ {
+ bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (dt_in1 == dt_out);
+ bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (dt_in2 == dt_out);
+ bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+ ARM_COMPUTE_ASSERT(do_in_place);
+
+ if(src1_is_inplace)
+ {
+ actual_dst = &src1;
+ }
+ else
+ {
+ actual_dst = &src2;
+ }
+ }
auto allocate_tensor = [](TensorType & t)
{
@@ -89,11 +108,12 @@ protected:
// Create and configure function
FunctionType multiply;
- multiply.configure(&src1, &src2, (_is_inplace ? &src1 : &dst), scale, convert_policy, rounding_policy, act_info);
+ multiply.configure(&src1, &src2, actual_dst, scale, convert_policy, rounding_policy, act_info);
allocate_tensor(src1);
allocate_tensor(src2);
+ // If don't do in-place computation, still need to allocate original dst
if(!_is_inplace)
{
allocate_tensor(dst);
@@ -106,12 +126,7 @@ protected:
// Compute function
multiply.run();
- if(_is_inplace)
- {
- return src1;
- }
-
- return dst;
+ return std::move(*actual_dst);
}
SimpleTensor<T3> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out,
@@ -122,16 +137,12 @@ protected:
SimpleTensor<T1> src1{ shape0, dt_in1, 1, qinfo0 };
SimpleTensor<T2> src2{ shape1, dt_in2, 1, qinfo1 };
- // current in-place implementation only supports same metadata of input and output tensors.
- // By ignoring output quantization information here, we can make test cases implementation much simpler.
- QuantizationInfo output_qinfo = _is_inplace ? qinfo0 : qinfo_out;
-
// Fill reference
fill(src1, 0);
fill(src2, 1);
- auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, output_qinfo);
- return act_info.enabled() ? reference::activation_layer(result, act_info, output_qinfo) : result;
+ auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, qinfo_out);
+ return act_info.enabled() ? reference::activation_layer(result, act_info, qinfo_out) : result;
}
TensorType _target{};
@@ -143,7 +154,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, bool is_inplace)
{
PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>::setup(shape, shape, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy,
@@ -155,7 +165,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationBroadcastValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
bool is_inplace)
{
@@ -168,7 +177,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationValidationFloatFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, ActivationLayerInfo act_info, bool is_inplace)
{
PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, dt_in2, scale, convert_policy, rounding_policy,
@@ -180,7 +188,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationValidationIntegerFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, ActivationLayerInfo act_info, bool is_inplace)
{
PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, dt_in2, scale, convert_policy, rounding_policy,
@@ -192,7 +199,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationBroadcastValidationFloatFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
ActivationLayerInfo act_info, bool is_inplace)
{
@@ -205,7 +211,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationValidationQuantizedFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
{
public:
- template <typename...>
void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
@@ -218,7 +223,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PixelWiseMultiplicationBroadcastValidationQuantizedFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
{
public:
- template <typename...>
void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
{
diff --git a/tests/validation/fixtures/Pooling3dLayerFixture.h b/tests/validation/fixtures/Pooling3dLayerFixture.h
new file mode 100644
index 0000000000..1bdf615fb1
--- /dev/null
+++ b/tests/validation/fixtures/Pooling3dLayerFixture.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Pooling3dLayer.h"
+#include <random>
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class Pooling3dLayerValidationGenericFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
+ {
+ _target = compute_target(shape, pool_info, data_type, input_qinfo, output_qinfo);
+ _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ if(tensor.data_type() == DataType::F32)
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, 0);
+ }
+ else if(tensor.data_type() == DataType::F16)
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
+ library->fill(tensor, distribution, 0);
+ }
+ else // data type is quantized_asymmetric
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
+ }
+
+ TensorType compute_target(TensorShape shape, Pooling3dLayerInfo info,
+ DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
+ {
+ // Create tensors
+ TensorType src = create_tensor<TensorType>(shape, data_type, 1, input_qinfo, DataLayout::NDHWC);
+ const TensorShape dst_shape = misc::shape_calculator::compute_pool3d_shape((src.info()->tensor_shape()), info);
+ TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, DataLayout::NDHWC);
+
+ // Create and configure function
+ FunctionType pool_layer;
+ pool_layer.validate(src.info(), dst.info(), info);
+ pool_layer.configure(&src, &dst, info);
+
+ ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(src));
+
+ // Compute function
+ pool_layer.run();
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(TensorShape shape, Pooling3dLayerInfo info, DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
+ {
+ // Create reference
+ SimpleTensor<T> src(shape, data_type, 1, input_qinfo, DataLayout::NDHWC);
+ // Fill reference
+ fill(src);
+ return reference::pooling_3d_layer<T>(src, info, output_qinfo);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class Pooling3dLayerValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, PoolingType pool_type, Size3D pool_size, Size3D stride, Padding3D padding, bool exclude_padding, DataType data_type)
+ {
+ Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type, pool_size, stride, padding, exclude_padding),
+ data_type);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class Pooling3dLayerValidationQuantizedFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, PoolingType pool_type, Size3D pool_size, Size3D stride, Padding3D padding, bool exclude_padding, DataType data_type,
+ QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
+ {
+ Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type, pool_size, stride, padding, exclude_padding),
+ data_type, input_qinfo, output_qinfo);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class Pooling3dLayerGlobalValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, PoolingType pool_type, DataType data_type)
+ {
+ Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type), data_type);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SpecialPooling3dLayerValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape src_shape, Pooling3dLayerInfo pool_info, DataType data_type)
+ {
+ Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE */
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index ec4e9f80dd..59c920868b 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PoolingLayerValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, bool indices = false,
QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo(), bool mixed_layout = false)
{
@@ -161,10 +160,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PoolingLayerIndicesValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
- void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout)
+ void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout, bool use_kernel_indices)
{
- PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding),
+ PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding, false,
+ true, use_kernel_indices),
data_type, data_layout, true);
}
};
@@ -173,7 +172,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout)
{
PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding),
@@ -185,7 +183,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PoolingLayerValidationMixedPrecisionFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout, bool fp_mixed_precision = false)
{
PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding, fp_mixed_precision),
@@ -197,7 +194,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout = DataLayout::NCHW,
QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
{
@@ -210,10 +206,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SpecialPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape src_shape, PoolingLayerInfo pool_info, DataType data_type)
{
- PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, DataLayout::NCHW);
+ PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, pool_info.data_layout);
}
};
@@ -221,7 +216,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, PoolingType pool_type, DataType data_type, DataLayout data_layout = DataLayout::NCHW)
{
PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, data_layout), data_type, data_layout);
diff --git a/tests/validation/fixtures/PriorBoxLayerFixture.h b/tests/validation/fixtures/PriorBoxLayerFixture.h
index 40a9f5d976..0a76cfd155 100644
--- a/tests/validation/fixtures/PriorBoxLayerFixture.h
+++ b/tests/validation/fixtures/PriorBoxLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class PriorBoxLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, PriorBoxLayerInfo info, DataType data_type, DataLayout data_layout)
{
TensorInfo input_info(input_shape, 1, data_type);
diff --git a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
index b496e4abad..e864b4affe 100644
--- a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
+++ b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class QLSTMLayerNormalizationValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weight_shape, TensorShape bias_shape, DataType data_type, QuantizationInfo weight_qinfo)
{
ARM_COMPUTE_ERROR_ON(data_type != DataType::QSYMM16);
diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h
index 8e8b920a9a..1b21967bda 100644
--- a/tests/validation/fixtures/QuantizationLayerFixture.h
+++ b/tests/validation/fixtures/QuantizationLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class QuantizationValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo, QuantizationInfo qinfo_in)
{
_target = compute_target(shape, data_type_in, data_type_out, qinfo, qinfo_in);
@@ -108,7 +107,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class QuantizationValidationFixture : public QuantizationValidationGenericFixture<TensorType, AccessorType, FunctionType, Tin, Tout>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo)
{
QuantizationValidationGenericFixture<TensorType, AccessorType, FunctionType, Tin, Tout>::setup(shape, data_type_in, data_type_out, qinfo, QuantizationInfo());
diff --git a/tests/validation/fixtures/RNNLayerFixture.h b/tests/validation/fixtures/RNNLayerFixture.h
index 983cd43398..e9a05e7838 100644
--- a/tests/validation/fixtures/RNNLayerFixture.h
+++ b/tests/validation/fixtures/RNNLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,7 +42,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class RNNLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape recurrent_weights_shape, TensorShape bias_shape, TensorShape output_shape, ActivationLayerInfo info,
DataType data_type)
{
diff --git a/tests/validation/fixtures/ROIAlignLayerFixture.h b/tests/validation/fixtures/ROIAlignLayerFixture.h
index 4c349bcd47..ad76dcbbd9 100644
--- a/tests/validation/fixtures/ROIAlignLayerFixture.h
+++ b/tests/validation/fixtures/ROIAlignLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIAlignLayerGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo)
{
_rois_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::QASYMM16 : data_type;
@@ -189,7 +188,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIAlignLayerFixture : public ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois>
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout)
{
ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois>::setup(input_shape, pool_info, rois_shape, data_type, data_layout,
@@ -201,7 +199,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIAlignLayerQuantizedFixture : public ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois>
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type,
DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo)
{
diff --git a/tests/validation/fixtures/ROIPoolingLayerFixture.h b/tests/validation/fixtures/ROIPoolingLayerFixture.h
index 763c5a7d76..4b46a6176d 100644
--- a/tests/validation/fixtures/ROIPoolingLayerFixture.h
+++ b/tests/validation/fixtures/ROIPoolingLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIPoolingLayerGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo)
{
_target = compute_target(input_shape, data_type, data_layout, pool_info, rois_shape, qinfo, output_qinfo);
@@ -174,7 +173,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIPoolingLayerQuantizedFixture : public ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type,
DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo)
{
@@ -187,7 +185,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ROIPoolingLayerFixture : public ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout)
{
ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, pool_info, rois_shape, data_type, data_layout,
diff --git a/tests/validation/fixtures/RangeFixture.h b/tests/validation/fixtures/RangeFixture.h
index 9265e299b6..166613a318 100644
--- a/tests/validation/fixtures/RangeFixture.h
+++ b/tests/validation/fixtures/RangeFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class RangeFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(const DataType data_type0, float start, float step, const QuantizationInfo qinfo0 = QuantizationInfo())
{
_target = compute_target(data_type0, qinfo0, start, step);
diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h
index 304630e9f5..e61941435c 100644
--- a/tests/validation/fixtures/ReduceMeanFixture.h
+++ b/tests/validation/fixtures/ReduceMeanFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReduceMeanValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
{
_target = compute_target(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
@@ -124,7 +123,13 @@ protected:
{
TensorShape output_shape = i == 0 ? src_shape : out.shape();
output_shape.set(axis[i], 1);
- out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, quantization_info_output);
+ bool is_opencl = false;
+
+#ifdef ARM_COMPUTE_OPENCL_ENABLED
+ is_opencl = std::is_same<CLTensor, TensorType>::value; // Round down to zero on opencl to match kernel
+#endif /* ARM_COMPUTE_OPENCL_ENABLED */
+ out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, data_type, quantization_info_output,
+ is_opencl ? RoundingPolicy::TO_ZERO : RoundingPolicy::TO_NEAREST_UP);
}
if(!keep_dims)
@@ -133,7 +138,7 @@ protected:
std::sort(axis.begin(), axis.begin() + axis.num_dimensions());
for(unsigned int i = 0; i < axis.num_dimensions(); ++i)
{
- output_shape.remove_dimension(axis[i] - i);
+ output_shape.remove_dimension(axis[i] - i, false);
}
out = reference::reshape_layer(out, output_shape);
@@ -149,7 +154,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReduceMeanQuantizedFixture : public ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
{
ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
@@ -160,7 +164,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReduceMeanFixture : public ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims)
{
ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, QuantizationInfo(), QuantizationInfo());
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index fc422ad35b..b44f299486 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReductionOperationValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info, bool keep_dims = false)
{
const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN);
@@ -76,14 +75,14 @@ protected:
if(tensor.data_type() == DataType::QASYMM8)
{
std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, 0);
}
else if(tensor.data_type() == DataType::QASYMM8_SIGNED)
{
std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
- std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+ std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
library->fill(tensor, distribution, 0);
}
@@ -135,7 +134,7 @@ protected:
// Fill reference
fill(src);
- return reference::reduction_operation<T, T>(src, dst_shape, axis, op, quantization_info);
+ return reference::reduction_operation<T, T>(src, dst_shape, axis, op, data_type, quantization_info);
}
TensorType _target{};
@@ -149,7 +148,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReductionOperationQuantizedFixture : public ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info = QuantizationInfo(), bool keep_dims = false)
{
ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info, keep_dims);
@@ -160,7 +158,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReductionOperationFixture : public ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, bool keep_dims = false)
{
ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo(), keep_dims);
diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h
deleted file mode 100644
index 2cb8e67f62..0000000000
--- a/tests/validation/fixtures/RemapFixture.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_REMAP_FIXTURE
-#define ARM_COMPUTE_TEST_REMAP_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Remap.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class RemapValidationGenericFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout = DataLayout::NCHW)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const T constant_border_value = static_cast<T>(distribution(gen));
-
- _data_layout = data_layout;
- _target = compute_target(shape, policy, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i, float min, float max)
- {
- std::uniform_int_distribution<> distribution((int)min, (int)max);
- library->fill(tensor, distribution, i);
- }
-
- TensorType compute_target(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
- {
- if(_data_layout == DataLayout::NHWC)
- {
- permute(shape, PermutationVector(2U, 0U, 1U));
- }
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type, 1, QuantizationInfo(), _data_layout);
- TensorType map_x = create_tensor<TensorType>(shape, DataType::F32, 1, QuantizationInfo(), _data_layout);
- TensorType map_y = create_tensor<TensorType>(shape, DataType::F32, 1, QuantizationInfo(), _data_layout);
- TensorType dst = create_tensor<TensorType>(shape, data_type, 1, QuantizationInfo(), _data_layout);
-
- // Create and configure function
- FunctionType remap;
- remap.configure(&src, &map_x, &map_y, &dst, policy, border_mode, constant_border_value);
-
- ARM_COMPUTE_ASSERT(src.info()->is_resizable());
- ARM_COMPUTE_ASSERT(map_x.info()->is_resizable());
- ARM_COMPUTE_ASSERT(map_y.info()->is_resizable());
- ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
-
- // Allocate tensors
- src.allocator()->allocate();
- map_x.allocator()->allocate();
- map_y.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!map_x.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!map_y.info()->is_resizable());
- ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
-
- // Fill tensors
- int max_val = std::max({ shape.x(), shape.y(), shape.z() });
-
- fill(AccessorType(src), 0, 0, 255);
- fill(AccessorType(map_x), 1, -5, max_val);
- fill(AccessorType(map_y), 2, -5, max_val);
-
- // Compute function
- remap.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
- SimpleTensor<float> map_x{ shape, DataType::F32 };
- SimpleTensor<float> map_y{ shape, DataType::F32 };
-
- // Create the valid mask Tensor
- _valid_mask = SimpleTensor<T> { shape, data_type };
-
- // Fill reference
- int max_val = std::max({ shape.x(), shape.y(), shape.z() });
-
- fill(src, 0, 0, 255);
- fill(map_x, 1, -5, max_val);
- fill(map_y, 2, -5, max_val);
-
- // Compute reference
- return reference::remap<T>(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
- SimpleTensor<T> _valid_mask{};
- DataLayout _data_layout{};
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class RemapValidationFixture : public RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode)
- {
- RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, policy, data_type, border_mode);
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class RemapValidationMixedLayoutFixture : public RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout)
- {
- RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, policy, data_type, border_mode, data_layout);
- }
-};
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REMAP_FIXTURE */
diff --git a/tests/validation/fixtures/ReorderFixture.h b/tests/validation/fixtures/ReorderFixture.h
new file mode 100644
index 0000000000..8e28484c48
--- /dev/null
+++ b/tests/validation/fixtures/ReorderFixture.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Reorder.h"
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** [ReorderLayer fixture] **/
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ReorderValidationFixture : public framework::Fixture
+{
+public:
+ void check_hardware_supports(WeightFormat output_wf){
+ if(!Scheduler::get().cpu_info().has_sve() && output_wf!=WeightFormat::OHWIo4){
+ _hardware_supports = false;
+ }
+ if (Scheduler::get().cpu_info().has_sve() && arm_gemm::utils::get_vector_length<float>() != 8 && output_wf==WeightFormat::OHWIo8)
+ {
+ _hardware_supports = false;
+ }
+ }
+
+ void setup(TensorShape input_shape, TensorShape output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type)
+ {
+ check_hardware_supports(output_wf);
+ if (_hardware_supports){
+ _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type);
+ _reference = compute_reference(input_shape, output_shape, output_wf, data_type);
+ }
+ }
+
+ protected:
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
+
+ TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type)
+ {
+ // Create tensors
+ TensorType src = create_tensor<TensorType>(input_shape, data_type);
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type);
+
+ // Create and configure function
+ FunctionType reorder;
+
+ reorder.configure(&src, &dst, input_wf, output_wf);
+
+ ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(src));
+
+ // Compute function
+ reorder.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, WeightFormat output_wf, DataType data_type)
+ {
+ // Create reference
+ SimpleTensor<T> src{ input_shape, data_type };
+
+ // Fill reference
+ fill(src);
+
+ return reference::reorder_layer<T>(src, output_shape, output_wf);
+ }
+
+ bool _hardware_supports = true;
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+/** [ReorderLayer fixture] **/
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H
diff --git a/tests/validation/fixtures/ReorgLayerFixture.h b/tests/validation/fixtures/ReorgLayerFixture.h
index 76a486e3d7..f87017190e 100644
--- a/tests/validation/fixtures/ReorgLayerFixture.h
+++ b/tests/validation/fixtures/ReorgLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReorgLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, int32_t stride, DataType data_type, DataLayout data_layout)
{
_target = compute_target(input_shape, stride, data_type, data_layout);
diff --git a/tests/validation/fixtures/ReshapeLayerFixture.h b/tests/validation/fixtures/ReshapeLayerFixture.h
index 8a98379ef2..5be431f8cf 100644
--- a/tests/validation/fixtures/ReshapeLayerFixture.h
+++ b/tests/validation/fixtures/ReshapeLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -31,6 +31,7 @@
#include "tests/IAccessor.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ReshapeLayer.h"
namespace arm_compute
@@ -41,13 +42,12 @@ namespace validation
{
/** [ReshapeLayer fixture] **/
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ReshapeLayerValidationFixture : public framework::Fixture
+class ReshapeLayerGenericValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
+ void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type, bool add_x_padding = false)
{
- _target = compute_target(input_shape, output_shape, data_type);
+ _target = compute_target(input_shape, output_shape, data_type, add_x_padding);
_reference = compute_reference(input_shape, output_shape, data_type);
}
@@ -58,7 +58,7 @@ protected:
library->fill_tensor_uniform(tensor, i);
}
- TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type)
+ TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type, bool add_x_padding = false)
{
// Check if indeed the input shape can be reshape to the output one
ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size());
@@ -75,6 +75,12 @@ protected:
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+ if(add_x_padding)
+ {
+ // Add random padding in x dimension
+ add_padding_x({ &src, &dst });
+ }
+
// Allocate tensors
src.allocator()->allocate();
dst.allocator()->allocate();
@@ -105,8 +111,27 @@ protected:
TensorType _target{};
SimpleTensor<T> _reference{};
};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ReshapeLayerValidationFixture : public ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
+ {
+ ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, output_shape, data_type);
+ }
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ReshapeLayerPaddedValidationFixture : public ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
+ {
+ ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, output_shape, data_type, true /* add_x_padding */);
+ }
+};
/** [ReshapeLayer fixture] **/
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H
diff --git a/tests/validation/fixtures/ReverseFixture.h b/tests/validation/fixtures/ReverseFixture.h
index 9d047a0067..856bff7b12 100644
--- a/tests/validation/fixtures/ReverseFixture.h
+++ b/tests/validation/fixtures/ReverseFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_REVERSE_FIXTURE
-#define ARM_COMPUTE_TEST_REVERSE_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorShape.h"
@@ -45,11 +45,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ReverseValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape, TensorShape axis_shape, DataType data_type)
+ void setup(TensorShape shape, TensorShape axis_shape, DataType data_type, bool use_negative_axis = false, bool use_inverted_axis = false)
{
- _target = compute_target(shape, axis_shape, data_type);
- _reference = compute_reference(shape, axis_shape, data_type);
+ _num_dims = shape.num_dimensions();
+ _target = compute_target(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis);
+ _reference = compute_reference(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis);
}
protected:
@@ -58,16 +58,25 @@ protected:
{
library->fill_tensor_uniform(tensor, 0);
}
- std::vector<int> generate_random_axis()
+ std::vector<int32_t> generate_random_axis(bool use_negative = false)
{
- std::vector<int> axis_v = { 0, 1, 2, 3 };
- std::mt19937 g(0);
+ std::vector<int32_t> axis_v;
+ if(use_negative)
+ {
+ axis_v = { -1, -2, -3, -4 };
+ }
+ else
+ {
+ axis_v = { 0, 1, 2, 3 };
+ }
+ axis_v = std::vector<int32_t>(axis_v.begin(), axis_v.begin() + _num_dims);
+ std::mt19937 g(library->seed());
std::shuffle(axis_v.begin(), axis_v.end(), g);
return axis_v;
}
- TensorType compute_target(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type)
+ TensorType compute_target(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type, bool use_negative_axis, bool use_inverted_axis = false)
{
// Create tensors
TensorType src = create_tensor<TensorType>(shape, data_type, 1);
@@ -76,7 +85,7 @@ protected:
// Create and configure function
FunctionType reverse_func;
- reverse_func.configure(&src, &dst, &axis);
+ reverse_func.configure(&src, &dst, &axis, use_inverted_axis);
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(axis.info()->is_resizable());
@@ -95,8 +104,8 @@ protected:
fill(AccessorType(src));
{
auto axis_data = AccessorType(axis);
- auto axis_v = generate_random_axis();
- std::copy(axis_v.begin(), axis_v.begin() + axis_shape.x(), static_cast<int32_t *>(axis_data.data()));
+ auto axis_v = generate_random_axis(use_negative_axis);
+ std::copy(axis_v.begin(), axis_v.begin() + axis_shape.total_size(), static_cast<int32_t *>(axis_data.data()));
}
// Compute function
@@ -105,24 +114,25 @@ protected:
return dst;
}
- SimpleTensor<T> compute_reference(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type)
+ SimpleTensor<T> compute_reference(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type, bool use_negative_axis, bool use_inverted_axis = false)
{
// Create reference
- SimpleTensor<T> src{ shape, data_type };
- SimpleTensor<uint32_t> axis{ axis_shape, DataType::U32 };
+ SimpleTensor<T> src{ shape, data_type };
+ SimpleTensor<int32_t> axis{ axis_shape, DataType::S32 };
// Fill reference
fill(src);
- auto axis_v = generate_random_axis();
- std::copy(axis_v.begin(), axis_v.begin() + axis_shape.x(), axis.data());
+ auto axis_v = generate_random_axis(use_negative_axis);
+ std::copy(axis_v.begin(), axis_v.begin() + axis_shape.total_size(), axis.data());
- return reference::reverse<T>(src, axis);
+ return reference::reverse<T>(src, axis, use_inverted_axis);
}
TensorType _target{};
SimpleTensor<T> _reference{};
+ unsigned int _num_dims{};
};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REVERSE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index b719a22fdf..86d89d71f7 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,15 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_SCALE_FIXTURE
-#define ARM_COMPUTE_TEST_SCALE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H
+
+#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT
#include "tests/framework/Fixture.h"
#include "tests/validation/reference/Permute.h"
#include "tests/validation/reference/Scale.h"
@@ -44,23 +39,23 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ScaleValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy,
- bool align_corners, bool mixed_layout)
+ bool align_corners, bool mixed_layout, QuantizationInfo output_quantization_info)
{
- _shape = shape;
- _policy = policy;
- _border_mode = border_mode;
- _sampling_policy = sampling_policy;
- _data_type = data_type;
- _quantization_info = quantization_info;
- _align_corners = align_corners;
- _mixed_layout = mixed_layout;
+ _shape = shape;
+ _policy = policy;
+ _border_mode = border_mode;
+ _sampling_policy = sampling_policy;
+ _data_type = data_type;
+ _input_quantization_info = quantization_info;
+ _output_quantization_info = output_quantization_info;
+ _align_corners = align_corners;
+ _mixed_layout = mixed_layout;
generate_scale(shape);
- std::mt19937 generator(library->seed());
- std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+ std::mt19937 generator(library->seed());
+ std::uniform_int_distribution<uint32_t> distribution_u8(0, 255);
_constant_border_value = static_cast<T>(distribution_u8(generator));
_target = compute_target(shape, data_layout);
@@ -144,7 +139,7 @@ protected:
}
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _quantization_info, data_layout);
+ TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, data_layout);
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -152,7 +147,7 @@ protected:
TensorShape shape_scaled(shape);
shape_scaled.set(idx_width, shape[idx_width] * _scale_x, /* apply_dim_correction = */ false);
shape_scaled.set(idx_height, shape[idx_height] * _scale_y, /* apply_dim_correction = */ false);
- TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _quantization_info, data_layout);
+ TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _output_quantization_info, data_layout);
// Create and configure function
FunctionType scale;
@@ -188,12 +183,12 @@ protected:
SimpleTensor<T> compute_reference(const TensorShape &shape)
{
// Create reference
- SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info };
+ SimpleTensor<T> src{ shape, _data_type, 1, _input_quantization_info };
// Fill reference
fill(src);
- return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners);
+ return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners, _output_quantization_info);
}
TensorType _target{};
@@ -204,7 +199,8 @@ protected:
T _constant_border_value{};
SamplingPolicy _sampling_policy{};
DataType _data_type{};
- QuantizationInfo _quantization_info{};
+ QuantizationInfo _input_quantization_info{};
+ QuantizationInfo _output_quantization_info{};
bool _align_corners{ false };
bool _mixed_layout{ false };
float _scale_x{ 1.f };
@@ -215,7 +211,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ScaleValidationQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy,
bool align_corners)
{
@@ -227,14 +222,34 @@ public:
border_mode,
sampling_policy,
align_corners,
- mixed_layout);
+ mixed_layout,
+ quantization_info);
+ }
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class ScaleValidationDifferentOutputQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, DataType data_type, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, InterpolationPolicy policy,
+ BorderMode border_mode, SamplingPolicy sampling_policy,
+ bool align_corners)
+ {
+ ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+ data_type,
+ input_quantization_info,
+ data_layout,
+ policy,
+ border_mode,
+ sampling_policy,
+ align_corners,
+ mixed_layout,
+ output_quantization_info);
}
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
class ScaleValidationFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners)
{
ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
@@ -245,10 +260,11 @@ public:
border_mode,
sampling_policy,
align_corners,
- mixed_layout);
+ mixed_layout,
+ QuantizationInfo());
}
};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SCALE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H
diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h
new file mode 100644
index 0000000000..af161ef98b
--- /dev/null
+++ b/tests/validation/fixtures/ScatterLayerFixture.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H
+
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "tests/Globals.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ScatterLayer.h"
+#include "tests/SimpleTensor.h"
+
+#include <random>
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ScatterGenericValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape,
+ TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, bool inplace, bool padding,
+ QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo())
+ {
+ // this is for improving randomness across tests
+ _hash = src_shape[0] + src_shape[1] + src_shape[2] + src_shape[3] + src_shape[4] + src_shape[5]
+ + updates_shape[0] + updates_shape[1] + updates_shape[2] + updates_shape[3]
+ + updates_shape[4] + updates_shape[5]
+ + indices_shape[0] + indices_shape[1] + indices_shape[2] + indices_shape[3];
+
+ _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, inplace, padding, src_qinfo, o_qinfo);
+ _reference = compute_reference(src_shape, updates_shape, indices_shape, out_shape, data_type,scatter_info, src_qinfo , o_qinfo);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch(tensor.data_type())
+ {
+ case DataType::F32:
+ case DataType::F16:
+ {
+ std::uniform_real_distribution<float> distribution(-10.f, 10.f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::S32:
+ case DataType::S16:
+ case DataType::S8:
+ {
+ std::uniform_int_distribution<int32_t> distribution(-100, 100);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::U32:
+ case DataType::U16:
+ case DataType::U8:
+ {
+ std::uniform_int_distribution<uint32_t> distribution(0, 200);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported data type.");
+ }
+ }
+ }
+
+ // This is used to fill indices tensor with S32 datatype.
+ // Used to prevent ONLY having values that are out of bounds.
+ template <typename U>
+ void fill_indices(U &&tensor, int i, const TensorShape &shape)
+ {
+ // Calculate max indices the shape should contain. Add an arbitrary value to allow testing for some out of bounds values (In this case min dimension)
+ const int32_t max = std::min({shape[0] , shape[1], shape[2]}) + 1;
+ library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(0), static_cast<int32_t>(max));
+ }
+
+ TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c,
+ const TensorShape &out_shape, DataType data_type, const ScatterInfo info, bool inplace, bool padding,
+ QuantizationInfo a_qinfo, QuantizationInfo o_qinfo)
+ {
+ // 1. Create relevant tensors using ScatterInfo data structure.
+ // ----------------------------------------------------
+ // In order - src, updates, indices, output.
+ TensorType src = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo);
+ TensorType updates = create_tensor<TensorType>(shape_b, data_type, 1, a_qinfo);
+ TensorType indices = create_tensor<TensorType>(shape_c, DataType::S32, 1, QuantizationInfo());
+ TensorType dst = create_tensor<TensorType>(out_shape, data_type, 1, o_qinfo);
+
+ FunctionType scatter;
+
+ // Configure operator
+ // When scatter_info.zero_initialization is true, pass nullptr for src
+ // because dst does not need to be initialized with src values.
+ if(info.zero_initialization)
+ {
+ scatter.configure(nullptr, &updates, &indices, &dst, info);
+ }
+ else
+ {
+ if(inplace)
+ {
+ scatter.configure(&src, &updates, &indices, &src, info);
+ }
+ else
+ {
+ scatter.configure(&src, &updates, &indices, &dst, info);
+ }
+ }
+
+ // Assertions
+ ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(updates.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(indices.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ if(padding)
+ {
+ add_padding_x({ &src, &updates, &indices});
+
+ if(!inplace)
+ {
+ add_padding_x({ &dst });
+ }
+ }
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ updates.allocator()->allocate();
+ indices.allocator()->allocate();
+
+ if(!inplace)
+ {
+ dst.allocator()->allocate();
+ }
+
+ ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!updates.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!indices.info()->is_resizable());
+
+ if(!inplace)
+ {
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+ }
+
+ // Fill update (a) and indices (b) tensors.
+ fill(AccessorType(src), 0 + _hash);
+ fill(AccessorType(updates), 1+ _hash);
+ fill_indices(AccessorType(indices), 2 + _hash, out_shape);
+
+ scatter.run();
+
+ if(inplace)
+ {
+ return src;
+ }
+ else
+ {
+ return dst;
+ }
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape,
+ const TensorShape &out_shape, DataType data_type, ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo)
+ {
+ // Output Quantization not currently in use - fixture should be extended to support this.
+ ARM_COMPUTE_UNUSED(o_qinfo);
+ TensorShape src_shape = a_shape;
+ TensorShape updates_shape = b_shape;
+ TensorShape indices_shape = c_shape;
+ const int num_ind_dims = c_shape.num_dimensions();
+
+ // 1. Collapse batch index into a single dim if necessary for update tensor and indices tensor.
+ if(num_ind_dims >= 3)
+ {
+ indices_shape = indices_shape.collapsed_from(1);
+ updates_shape = updates_shape.collapsed_from(updates_shape.num_dimensions() - (num_ind_dims -1)); // Collapses batch dims
+ }
+
+ // 2. Collapse data dims into a single dim.
+ // Collapse all src dims into 2 dims. First one holding data, the other being the index we iterate over.
+ src_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse all data dims into single dim.
+ src_shape = src_shape.collapsed_from(1); // Collapse all index dims into a single dim
+ updates_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse data dims (all except last dim which is batch dim)
+
+ // Create reference tensors
+ SimpleTensor<T> src{ src_shape, data_type, 1, a_qinfo };
+ SimpleTensor<T> updates{updates_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<int32_t> indices{ indices_shape, DataType::S32, 1, QuantizationInfo() };
+
+ // Fill reference
+ fill(src, 0 + _hash);
+ fill(updates, 1 + _hash);
+ fill_indices(indices, 2 + _hash, out_shape);
+
+ // Calculate individual reference using collapsed shapes
+ return reference::scatter_layer<T>(src, updates, indices, out_shape, info);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ int32_t _hash{};
+};
+
+// This fixture will use the same shape for updates as indices.
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ScatterValidationFixture : public ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape,
+ TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init, bool inplace, bool padding)
+ {
+ ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape,
+ indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), inplace, padding,
+ QuantizationInfo(), QuantizationInfo());
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/ScharrFixture.h b/tests/validation/fixtures/ScharrFixture.h
index e67d0178b0..b54a9d29e6 100644
--- a/tests/validation/fixtures/ScharrFixture.h
+++ b/tests/validation/fixtures/ScharrFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,7 +66,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ScharrValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
{
// Generate a random constant value
diff --git a/tests/validation/fixtures/SelectFixture.h b/tests/validation/fixtures/SelectFixture.h
index 7b035574f7..8cb6f062f9 100644
--- a/tests/validation/fixtures/SelectFixture.h
+++ b/tests/validation/fixtures/SelectFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,7 +63,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SelectValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, bool has_same_same_rank, DataType data_type)
{
TensorShape condition_shape = detail::select_condition_shape(shape, has_same_same_rank);
diff --git a/tests/validation/fixtures/SliceOperationsFixtures.h b/tests/validation/fixtures/SliceOperationsFixtures.h
index 267cdd5123..b1f91ea2e0 100644
--- a/tests/validation/fixtures/SliceOperationsFixtures.h
+++ b/tests/validation/fixtures/SliceOperationsFixtures.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SliceFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, Coordinates starts, Coordinates ends, DataType data_type)
{
_target = compute_target(shape, starts, ends, data_type);
@@ -108,7 +107,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class StridedSliceFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape,
Coordinates starts, Coordinates ends, BiStrides strides,
int32_t begin_mask, int32_t end_mask, int32_t shrink_mask,
diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h
index 352e215dee..f4bf8df9c0 100644
--- a/tests/validation/fixtures/SoftmaxLayerFixture.h
+++ b/tests/validation/fixtures/SoftmaxLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SoftmaxValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis)
{
_quantization_info = quantization_info;
@@ -131,7 +130,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SoftmaxValidationFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
{
SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>::setup(shape,
@@ -146,7 +144,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis)
{
SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>::setup(shape,
diff --git a/tests/validation/fixtures/SpaceToBatchFixture.h b/tests/validation/fixtures/SpaceToBatchFixture.h
index cfa7775c21..964e511301 100644
--- a/tests/validation/fixtures/SpaceToBatchFixture.h
+++ b/tests/validation/fixtures/SpaceToBatchFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SpaceToBatchLayerValidationGenericFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape,
DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info)
{
@@ -140,7 +139,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SpaceToBatchLayerValidationFixture : public SpaceToBatchLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape,
DataType data_type, DataLayout data_layout)
{
@@ -152,7 +150,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SpaceToBatchLayerValidationQuantizedFixture : public SpaceToBatchLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape,
DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info)
{
diff --git a/tests/validation/fixtures/SpaceToDepthFixture.h b/tests/validation/fixtures/SpaceToDepthFixture.h
index 7448ec546b..2d2e9fad7d 100644
--- a/tests/validation/fixtures/SpaceToDepthFixture.h
+++ b/tests/validation/fixtures/SpaceToDepthFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_FIXTURE
#define ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_FIXTURE
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "tests/Globals.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
@@ -39,7 +40,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class SpaceToDepthLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape output_shape, const int block_shape, DataType data_type, DataLayout data_layout)
{
_target = compute_target(input_shape, output_shape, block_shape, data_type, data_layout);
@@ -69,6 +69,12 @@ protected:
TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
+ auto calc_out_shape = misc::shape_calculator::compute_space_to_depth_shape(input.info(), block_shape);
+ ARM_COMPUTE_ASSERT(output_shape[0] == calc_out_shape[0]);
+ ARM_COMPUTE_ASSERT(output_shape[1] == calc_out_shape[1]);
+ ARM_COMPUTE_ASSERT(output_shape[2] == calc_out_shape[2]);
+ ARM_COMPUTE_ASSERT(output_shape[3] == calc_out_shape[3]);
+
// Create and configure function
FunctionType space_to_depth;
space_to_depth.configure(&input, &output, block_shape);
diff --git a/tests/validation/fixtures/SplitFixture.h b/tests/validation/fixtures/SplitFixture.h
index 3006f21467..203925329c 100644
--- a/tests/validation/fixtures/SplitFixture.h
+++ b/tests/validation/fixtures/SplitFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type
class SplitFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, unsigned int axis, unsigned int splits, DataType data_type)
{
_target = compute_target(shape, axis, splits, data_type);
@@ -149,7 +148,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type
class SplitShapesFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, unsigned int axis, std::vector<TensorShape> split_shapes, DataType data_type)
{
_target = compute_target(shape, axis, split_shapes, data_type);
diff --git a/tests/validation/fixtures/StackLayerFixture.h b/tests/validation/fixtures/StackLayerFixture.h
index f729e059b0..7dd8fe47dc 100644
--- a/tests/validation/fixtures/StackLayerFixture.h
+++ b/tests/validation/fixtures/StackLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorShape.h"
@@ -52,10 +52,9 @@ template <typename TensorType, typename AbstractTensorType, typename AccessorTyp
class StackLayerValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors)
{
- _target = compute_target(shape_src, axis, data_type, num_tensors);
+ _target = compute_target(shape_src, axis, data_type, num_tensors, false /* add_x_padding */);
_reference = compute_reference(shape_src, axis, data_type, num_tensors);
}
@@ -66,7 +65,7 @@ protected:
library->fill_tensor_uniform(tensor, i);
}
- TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors)
+ TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors, bool add_x_padding)
{
std::vector<TensorType> tensors(num_tensors);
std::vector<AbstractTensorType *> src(num_tensors);
@@ -91,6 +90,11 @@ protected:
// Allocate and fill the input tensors
for(int i = 0; i < num_tensors; ++i)
{
+ if(add_x_padding)
+ {
+ add_padding_x({&tensors[i]}, DataLayout::NHWC);
+ }
+
ARM_COMPUTE_ASSERT(tensors[i].info()->is_resizable());
tensors[i].allocator()->allocate();
ARM_COMPUTE_ASSERT(!tensors[i].info()->is_resizable());
@@ -99,6 +103,11 @@ protected:
fill(AccessorType(tensors[i]), i);
}
+ if(add_x_padding)
+ {
+ add_padding_x({&dst}, DataLayout::NHWC);
+ }
+
// Allocate output tensor
dst.allocator()->allocate();
@@ -132,7 +141,21 @@ protected:
TensorType _target{};
SimpleTensor<T> _reference{};
};
+
+template <typename TensorType, typename AbstractTensorType, typename AccessorType, typename FunctionType, typename T>
+class StackLayerWithPaddingValidationFixture :
+ public StackLayerValidationFixture<TensorType, AbstractTensorType, AccessorType, FunctionType, T>
+{
+public:
+ using Parent = StackLayerValidationFixture<TensorType, AbstractTensorType, AccessorType, FunctionType, T>;
+
+ void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors)
+ {
+ Parent::_target = Parent::compute_target(shape_src, axis, data_type, num_tensors, true /* add_x_padding */);
+ Parent::_reference = Parent::compute_reference(shape_src, axis, data_type, num_tensors);
+ }
+};
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/TileFixture.h b/tests/validation/fixtures/TileFixture.h
index 35773d9a83..979eee5ab1 100644
--- a/tests/validation/fixtures/TileFixture.h
+++ b/tests/validation/fixtures/TileFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class TileValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type, const Multiples &multiples)
{
_target = compute_target(shape, data_type, multiples);
diff --git a/tests/validation/fixtures/TransposeFixture.h b/tests/validation/fixtures/TransposeFixture.h
index 6f032d4889..212c76cc9a 100644
--- a/tests/validation/fixtures/TransposeFixture.h
+++ b/tests/validation/fixtures/TransposeFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE
-#define ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -32,7 +32,7 @@
#include "tests/IAccessor.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Transpose.h"
+#include "tests/validation/reference/Permute.h"
namespace arm_compute
{
@@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class TransposeValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape shape, DataType data_type)
{
_target = compute_target(shape, data_type);
@@ -98,7 +97,7 @@ protected:
// Fill reference
fill(src);
- return reference::transpose<T>(src);
+ return reference::permute<T>(src, PermutationVector(1U, 0U));
}
TensorType _target{};
@@ -107,4 +106,4 @@ protected:
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H
diff --git a/tests/validation/fixtures/UNIT/DynamicTensorFixture.h b/tests/validation/fixtures/UNIT/DynamicTensorFixture.h
index bdf43050e6..3e96dcbf2d 100644
--- a/tests/validation/fixtures/UNIT/DynamicTensorFixture.h
+++ b/tests/validation/fixtures/UNIT/DynamicTensorFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -127,7 +127,6 @@ class DynamicTensorType3SingleFunction : public framework::Fixture
using T = float;
public:
- template <typename...>
void setup(TensorShape input_level0, TensorShape input_level1)
{
input_l0 = input_level0;
@@ -251,7 +250,6 @@ class DynamicTensorType3ComplexFunction : public framework::Fixture
using T = float;
public:
- template <typename...>
void setup(std::vector<TensorShape> input_shapes, TensorShape weights_shape, TensorShape bias_shape, std::vector<TensorShape> output_shapes, PadStrideInfo info)
{
num_iterations = input_shapes.size();
@@ -390,7 +388,6 @@ class DynamicTensorType2PipelineFunction : public framework::Fixture
using T = float;
public:
- template <typename...>
void setup(std::vector<TensorShape> input_shapes)
{
_data_type = DataType::F32;
diff --git a/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h
index af9f776ebc..f5e6071340 100644
--- a/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h
+++ b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h
@@ -74,10 +74,10 @@ protected:
TensorType compute_target()
{
// Create tensors
- TensorType w1 = create_tensor<TensorType>(TensorShape(180000U, 150U), DataType::F32, 1);
- TensorType b1 = create_tensor<TensorType>(TensorShape(150U), DataType::F32, 1);
- TensorType src = create_tensor<TensorType>(TensorShape(1U, 150U, 1200U, _max_batches), DataType::F32, 1);
- TensorType dst = create_tensor<TensorType>(TensorShape(150U, _max_batches), DataType::F32, 1);
+ TensorType w1 = create_tensor<TensorType>(TensorShape(6000U, 15U), DataType::F32, 1);
+ TensorType b1 = create_tensor<TensorType>(TensorShape(15U), DataType::F32, 1);
+ TensorType src = create_tensor<TensorType>(TensorShape(1U, 15U, 400U, _max_batches), DataType::F32, 1);
+ TensorType dst = create_tensor<TensorType>(TensorShape(15U, _max_batches), DataType::F32, 1);
// Create and configure function
FullyConnectedFunction fc_layer_1;
@@ -105,9 +105,9 @@ protected:
int diff = _max_batches - _cur_batches;
auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left);
auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left);
- src.allocator()->info().set_tensor_shape(TensorShape(1U, 150U, 1200U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding);
+ src.allocator()->info().set_tensor_shape(TensorShape(1U, 15U, 400U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding);
src.allocator()->info().set_is_resizable(false);
- dst.allocator()->info().set_tensor_shape(TensorShape(150U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding);
+ dst.allocator()->info().set_tensor_shape(TensorShape(15U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding);
dst.allocator()->info().set_is_resizable(false);
// Configure FC info
@@ -129,16 +129,16 @@ protected:
SimpleTensor<T> compute_reference()
{
// Create reference
- SimpleTensor<T> w1{ TensorShape(180000U, 150U), DataType::F32 };
- SimpleTensor<T> b1{ TensorShape(150U), DataType::F32 };
- SimpleTensor<T> src{ TensorShape(1U, 150U, 1200U, _cur_batches), DataType::F32 };
+ SimpleTensor<T> w1{ TensorShape(6000U, 15U), DataType::F32 };
+ SimpleTensor<T> b1{ TensorShape(15U), DataType::F32 };
+ SimpleTensor<T> src{ TensorShape(1U, 15U, 400U, _cur_batches), DataType::F32 };
// Fill reference
fill(src, 5);
fill(w1, 1);
fill(b1, 2);
- return reference::fully_connected_layer(src, w1, b1, TensorShape(150U, _cur_batches));
+ return reference::fully_connected_layer(src, w1, b1, TensorShape(15U, _cur_batches));
}
protected:
diff --git a/tests/validation/fixtures/UnstackFixture.h b/tests/validation/fixtures/UnstackFixture.h
index 4f8b280735..30b7dd5539 100644
--- a/tests/validation/fixtures/UnstackFixture.h
+++ b/tests/validation/fixtures/UnstackFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,7 +47,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type
class UnstackValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, int axis, int num, DataType data_type)
{
_target = compute_target(input_shape, axis, num, data_type);
diff --git a/tests/validation/fixtures/WeightsReshapeFixture.h b/tests/validation/fixtures/WeightsReshapeFixture.h
index 0b3e76d677..68bd8b689d 100644
--- a/tests/validation/fixtures/WeightsReshapeFixture.h
+++ b/tests/validation/fixtures/WeightsReshapeFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,10 +45,9 @@ namespace validation
using namespace arm_compute::misc::shape_calculator;
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class WeightsReshapeValidationFixture : public framework::Fixture
+class WeightsReshapeOpValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, DataType data_type, bool has_bias, unsigned int num_groups)
{
const TensorShape output_shape = compute_weights_reshaped_shape(TensorInfo(input_shape, 1, data_type), has_bias, num_groups);
@@ -73,7 +72,7 @@ protected:
// Create and configure function
FunctionType weights_reshape_func;
- weights_reshape_func.configure(&src, (has_bias ? &bias : nullptr), &dst, num_groups);
+ weights_reshape_func.configure(src.info(), (has_bias ? bias.info() : nullptr), dst.info(), num_groups);
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
@@ -99,8 +98,18 @@ protected:
fill(AccessorType(bias), 1);
}
+ arm_compute::ITensorPack pack =
+ {
+ { arm_compute::TensorType::ACL_SRC, &src },
+ { arm_compute::TensorType::ACL_DST, &dst }
+ };
+
+ if(has_bias)
+ {
+ pack.add_const_tensor(arm_compute::TensorType::ACL_BIAS, &bias);
+ }
// Compute function
- weights_reshape_func.run();
+ weights_reshape_func.run(pack);
return dst;
}
diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
index e99eb910e8..20b678b36c 100644
--- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
@@ -55,7 +55,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class WinogradConvolutionLayerFastMathValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
DataType data_type, ActivationLayerInfo act_info, const DataLayout &data_layout)
@@ -230,7 +229,7 @@ protected:
SimpleTensor<T1> filter_transform_out = reference::winograd_filter_transform<T1>(weights_t1, filter_transform_shape, winograd_info);
SimpleTensor<T1> batched_gemm = reference::gemm<T1>(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f);
SimpleTensor<T1> conv_out = reference::winograd_output_transform<T1>(batched_gemm, bias_t1, output_transform_shape, winograd_info);
- SimpleTensor<T> conv_out_t(std::move(copy_tensor<T, T1>(conv_out)));
+ SimpleTensor<T> conv_out_t(copy_tensor<T, T1>(conv_out));
return (act_info.enabled()) ? reference::activation_layer<T>(conv_out_t, act_info) : conv_out_t;
}
@@ -243,7 +242,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class WinogradInputTransformValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, WinogradInfo winograd_info, DataLayout data_layout, DataType data_type)
{
TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info);
@@ -355,7 +353,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class WinogradFilterTransformValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, Size2D output_tile, DataLayout data_layout, DataType data_type)
{
WinogradInfo winograd_info(output_tile, Size2D(input_shape[0], input_shape[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */);
@@ -470,7 +467,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class WinogradOutputTransformValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(TensorShape input_shape, WinogradInfo winograd_info, DataType data_type, ActivationLayerInfo act_info = ActivationLayerInfo())
{
_target = compute_target(input_shape, winograd_info, data_type, act_info);
@@ -588,4 +584,4 @@ protected:
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE */ \ No newline at end of file
+#endif // ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
new file mode 100644
index 0000000000..ca4de11a15
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/DepthwiseConvolutionLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+ using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value ||
+ std::is_same<typename std::decay<T>::type, int8_t>::value,
+ int32_t,
+ T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T
+
+ void setup(TensorShape input_shape,
+ Size2D kernel_size,
+ const PadStrideInfo &pad_stride,
+ const Size2D &dilation,
+ const unsigned int depth_multiplier,
+ const DataType data_type,
+ const DataLayout data_layout)
+ {
+ ARM_COMPUTE_ERROR_ON(data_layout !=
+ DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout
+
+ DepthwiseConv2dAttributes dwc_conv2d_attr;
+ const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(),
+ pad_stride.pad_bottom());
+ dwc_conv2d_attr.pad(padding_2d)
+ .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second))
+ .dilation(dilation)
+ .depth_multiplier(depth_multiplier)
+ .dimension_rounding_type(pad_stride.round());
+
+ // Calculate Output and Weight Shapes
+ TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height);
+
+ const TensorInfo in_info(input_shape, 1, data_type);
+ const TensorInfo we_info(weights_shape, 1, data_type);
+
+ const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation};
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info);
+
+ weights_shape.set(2, output_shape.z());
+ const TensorShape bias_shape = TensorShape(weights_shape[2]);
+
+ _data_type = data_type;
+ _data_layout = data_layout;
+ _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch (tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ // Given input is in nchw format
+ TensorType compute_target(TensorShape input_shape,
+ TensorShape weights_shape,
+ const TensorShape &bias_shape,
+ const DepthwiseConv2dAttributes dwc_conv2d_attr)
+ {
+ ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC);
+
+ // Our test shapes are assumed in NCHW data layout, thus the permutation
+ permute(input_shape, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape, PermutationVector(2U, 0U, 1U));
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout));
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout));
+ ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout));
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_input{};
+ TensorType t_weight{};
+ TensorType t_bias{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_input.allocator()->init(*input_info);
+ t_weight.allocator()->init(*weight_info);
+ t_bias.allocator()->init(*bias_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_input.allocator()->allocate();
+ t_weight.allocator()->allocate();
+ t_bias.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_input), 0);
+ fill(AccessorType(t_weight), 1);
+ fill(AccessorType(t_bias), 2);
+
+ // Run runtime
+ runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape,
+ const TensorShape &weights_shape,
+ const TensorShape &bias_shape,
+ const TensorShape &output_shape,
+ DepthwiseConv2dAttributes dwc_conv2d_attr)
+ {
+ // Create reference
+ SimpleTensor<T> src{input_shape, _data_type, 1};
+ SimpleTensor<T> weight{weights_shape, _data_type, 1};
+ SimpleTensor<TBias> bias{bias_shape, _data_type, 1};
+
+ fill(src, 0);
+ fill(weight, 1);
+ fill(bias, 2);
+
+ auto src_nchw = src;
+ auto weights_nchw = weight;
+ auto bias_nchw = bias;
+ auto output_shape_nchw = output_shape;
+
+ PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(),
+ dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right,
+ dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom,
+ DimensionRoundingType{});
+ auto dst_nchw =
+ reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride,
+ dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation());
+ return dst_nchw;
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ DataLayout _data_layout{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuDepthwiseConv2dValidationFixture
+ : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape,
+ Size2D kernel_size,
+ const PadStrideInfo &info,
+ const Size2D &dilation,
+ const unsigned int depth_multiplier,
+ DataType data_type,
+ DataLayout data_layout)
+ {
+ DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout);
+ }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
new file mode 100644
index 0000000000..1f4e223b93
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename U>
+void fill(U &&tensor, int i)
+{
+ switch (tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+}
+
+} // namespace
+
+/** General Conv2d fixture
+ * Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h
+ * TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+ using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value ||
+ std::is_same<typename std::decay<T>::type, int8_t>::value,
+ int32_t,
+ T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T
+
+ void setup(TensorShape input_shape,
+ TensorShape weights_shape,
+ TensorShape bias_shape,
+ TensorShape output_shape,
+ const PadStrideInfo &info,
+ const Size2D &dilation,
+ DataType data_type,
+ DataLayout data_layout,
+ QuantizationInfo quantization_info,
+ QuantizationInfo weight_quantization_info)
+ {
+ ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout
+ const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation);
+ _data_type = data_type;
+ _data_layout = data_layout;
+ _is_quantized = is_data_type_quantized_asymmetric(data_type);
+ _quantization_info = quantization_info;
+ _weight_quantization_info = weight_quantization_info;
+ _bias_data_type = _is_quantized ? DataType::S32 : data_type;
+ _target = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr);
+ }
+
+protected:
+ // Given input is in nchw format
+ TensorType compute_target(TensorShape input_shape,
+ TensorShape weights_shape,
+ const TensorShape &bias_shape,
+ Conv2dAttributes conv2d_attr)
+ {
+ ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC);
+ permute(input_shape, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape, PermutationVector(2U, 0U, 1U));
+ CLScheduler::get().default_reinit();
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout));
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout));
+ ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout));
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+ // Construct user tensors
+ TensorType t_input{};
+ TensorType t_weight{};
+ TensorType t_bias{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_input.allocator()->init(*input_info);
+ t_weight.allocator()->init(*weight_info);
+ t_bias.allocator()->init(*bias_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_input.allocator()->allocate();
+ t_weight.allocator()->allocate();
+ t_bias.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_input), 0);
+ fill(AccessorType(t_weight), 1);
+ fill(AccessorType(t_bias), 2);
+
+ // Run runtime
+ runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape,
+ const TensorShape &weights_shape,
+ const TensorShape &bias_shape,
+ const TensorShape &output_shape,
+ Conv2dAttributes conv2d_attr)
+ {
+ // Create reference
+ SimpleTensor<T> src{input_shape, _data_type, 1, _quantization_info};
+ SimpleTensor<T> weight{weights_shape, _data_type, 1, _weight_quantization_info};
+ SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info};
+
+ fill(src, 0);
+ fill(weight, 1);
+ fill(bias, 2);
+
+ auto src_nchw = src;
+ auto weights_nchw = weight;
+ auto bias_nchw = bias;
+ auto output_shape_nchw = output_shape;
+
+ PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left,
+ conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom,
+ DimensionRoundingType{});
+ auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw,
+ legacy_pad_stride, conv2d_attr.dilation());
+ return dst_nchw;
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ DataType _bias_data_type{};
+ DataLayout _data_layout{};
+ QuantizationInfo _quantization_info{};
+ QuantizationInfo _weight_quantization_info{};
+ bool _is_quantized = false;
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuConv2dValidationFixture
+ : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape,
+ TensorShape weights_shape,
+ TensorShape output_shape,
+ TensorShape bias_shape,
+ const PadStrideInfo &info,
+ const Size2D &dialation,
+ DataType data_type,
+ DataLayout data_layout,
+ QuantizationInfo quantization_info)
+ {
+ DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout,
+ quantization_info, quantization_info);
+ }
+};
+
+/** Specific Conv2d method: Direct Conv2d fixture
+ * Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h
+ * TODO: Parameterize to be fully backend agnostic: COMPMID-5760
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+ using TBias =
+ typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type;
+
+ void setup(TensorShape input_shape,
+ int stride_x,
+ int stride_y,
+ int pad_x,
+ int pad_y,
+ unsigned int kernel_size,
+ unsigned int num_kernels,
+ DataType data_type,
+ QuantizationInfo quantization_info,
+ DataLayout data_layout)
+ {
+ ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout
+
+ TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
+ const TensorShape bias_shape(num_kernels);
+ const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
+ const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+ const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */);
+
+ TensorInfo input_info = TensorInfo(input_shape, 1, data_type);
+ TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type);
+
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info);
+
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type,
+ bias_data_type, quantization_info, data_layout);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type,
+ bias_data_type, quantization_info);
+ }
+
+protected:
+ TensorType compute_target(TensorShape input_shape,
+ TensorShape weights_shape,
+ const TensorShape &bias_shape,
+ TensorShape output_shape,
+ const Conv2dAttributes &conv2d_attr,
+ DataType data_type,
+ DataType bias_data_type,
+ QuantizationInfo quantization_info,
+ const DataLayout &data_layout)
+ {
+ ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC);
+ ARM_COMPUTE_UNUSED(quantization_info);
+ // Dataset shapes are in NCHW layout
+ permute(input_shape, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape, PermutationVector(2U, 0U, 1U));
+ permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout));
+ auto weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout));
+ auto bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout));
+ auto dst_info = context.create_tensor_info();
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+ // Construct user tensors
+ TensorType t_input{};
+ TensorType t_weight{};
+ TensorType t_bias{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_input.allocator()->init(*input_info);
+ t_weight.allocator()->init(*weight_info);
+ t_bias.allocator()->init(*bias_info);
+ t_dst.allocator()->init(*dst_info);
+
+ ARM_COMPUTE_ASSERT(t_input.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable());
+
+ // Allocate and fill user tensors
+ t_input.allocator()->allocate();
+ t_weight.allocator()->allocate();
+ t_bias.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable());
+
+ fill(AccessorType(t_input), 0);
+ fill(AccessorType(t_weight), 1);
+ fill(AccessorType(t_bias), 2);
+
+ // Run runtime
+ runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &input_shape,
+ const TensorShape &weights_shape,
+ const TensorShape &bias_shape,
+ const TensorShape &output_shape,
+ const PadStrideInfo &info,
+ DataType data_type,
+ DataType bias_data_type,
+ QuantizationInfo quantization_info)
+ {
+ // Create reference
+ SimpleTensor<T> src{input_shape, data_type, 1, quantization_info};
+ SimpleTensor<T> weights{weights_shape, data_type, 1, quantization_info};
+ SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info};
+
+ // Fill reference
+ fill(src, 0);
+ fill(weights, 1);
+ fill(bias, 2);
+
+ SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info);
+ return dst;
+ }
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionDirectConv2dValidationFixture
+ : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape,
+ int stride_x,
+ int stride_y,
+ int pad_x,
+ int pad_y,
+ unsigned int kernel_size,
+ unsigned int num_kernels,
+ DataType data_type,
+ DataLayout data_layout)
+ {
+ DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(),
+ data_layout);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
new file mode 100644
index 0000000000..69bd0efbdc
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture
+{
+public:
+ void setup(ArithmeticOperation ref_op,
+ const TensorShape &shape0,
+ const TensorShape &shape1,
+ const TensorShape &shape2,
+ DataType data_type,
+ bool is_inplace,
+ bool fuse_two_ops = false)
+ {
+ _ref_op = ref_op;
+ _is_inplace = is_inplace;
+ _data_type = data_type;
+ _fuse = fuse_two_ops;
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
+ _target = compute_target(shape0, shape1, shape2);
+ _reference = compute_reference(shape0, shape1, shape2);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ if (is_data_type_float(tensor.data_type()))
+ {
+ switch (_ref_op)
+ {
+ case ArithmeticOperation::DIV:
+ library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)});
+ break;
+ case ArithmeticOperation::POWER:
+ library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
+ break;
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+ else if (tensor.data_type() == DataType::S32)
+ {
+ switch (_ref_op)
+ {
+ case ArithmeticOperation::DIV:
+ library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)});
+ break;
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Fuse first element wise binary Op
+ ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type));
+ ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type));
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ ITensorInfo *rhs_info_fuse = nullptr;
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info);
+
+ if (_fuse)
+ {
+ rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type));
+ ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse);
+ GpuOutput::create_op(sketch, ans2_info, dst_info);
+ }
+ else
+ {
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_lhs{};
+ TensorType t_rhs{};
+ TensorType t_rhs_fuse{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_lhs.allocator()->init(*lhs_info);
+ t_rhs.allocator()->init(*rhs_info);
+ t_dst.allocator()->init(*dst_info);
+ if (_fuse)
+ {
+ t_rhs_fuse.allocator()->init(*rhs_info_fuse);
+ }
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_lhs.allocator()->allocate();
+ t_rhs.allocator()->allocate();
+ t_dst.allocator()->allocate();
+ if (_fuse)
+ {
+ t_rhs_fuse.allocator()->allocate();
+ }
+
+ fill(AccessorType(t_lhs), 0);
+ fill(AccessorType(t_rhs), 1);
+ if (_fuse)
+ {
+ fill(AccessorType(t_rhs_fuse), 2);
+ }
+
+ // Run runtime
+ if (_fuse)
+ {
+ runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst});
+ }
+ else
+ {
+ runtime.run({&t_lhs, &t_rhs, &t_dst});
+ }
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1);
+
+ // Create reference
+ SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()};
+
+ // Fill reference
+ fill(ref_lhs, 0);
+ fill(ref_rhs, 1);
+
+ reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP);
+ if (_fuse)
+ {
+ fill(ref_rhs_fuse, 2);
+ reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP);
+ }
+ SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst;
+ return *ret;
+ }
+
+ ArithmeticOperation _ref_op{ArithmeticOperation::ADD};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ DataLayout _data_layout{};
+ bool _is_inplace{false};
+ bool _fuse{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture
+ : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace)
+ {
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ ref_op, shape0, shape0, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture
+ : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(ArithmeticOperation ref_op,
+ const TensorShape &shape0,
+ const TensorShape &shape1,
+ DataType data_type,
+ bool is_inplace)
+ {
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ ref_op, shape0, shape1, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture
+ : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(ArithmeticOperation ref_op,
+ const TensorShape &shape0,
+ const TensorShape &shape1,
+ const TensorShape &shape2,
+ DataType data_type,
+ bool is_inplace,
+ bool fuse_two_ops)
+ {
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
new file mode 100644
index 0000000000..4c1cc94d3d
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMM.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename U>
+void fill(U &&tensor, int i)
+{
+ switch (tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+}
+
+} // namespace
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape lhs_shape,
+ TensorShape rhs_shape,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ int M0,
+ int N0,
+ int K0,
+ bool export_rhs_to_cl_image,
+ DataType data_type)
+ {
+ //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices.
+ if (transpose_a)
+ {
+ permute(lhs_shape, PermutationVector(1U, 0U));
+ }
+ if (transpose_b)
+ {
+ permute(rhs_shape, PermutationVector(1U, 0U));
+ }
+
+ // Skip configurations unsupported by the device.
+ _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+ if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image)
+ {
+ ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+ framework::ARM_COMPUTE_PRINT_INFO();
+ return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs.
+ }
+
+ _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image,
+ data_type);
+ _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type);
+ }
+
+protected:
+ TensorType compute_target(TensorShape &shape_a,
+ TensorShape &shape_b,
+ bool transpose_a,
+ bool transpose_b,
+ int M0,
+ int N0,
+ int K0,
+ bool export_rhs_to_cl_image,
+ DataType data_type)
+ {
+ ARM_COMPUTE_UNUSED(export_rhs_to_cl_image);
+ CLScheduler::get().default_reinit();
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type));
+ ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type));
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ MatMulAttributes matmul_attr{};
+ matmul_attr.adj_lhs(transpose_a);
+ matmul_attr.adj_rhs(transpose_b);
+
+ GpuMatMulSettings matmul_settings{};
+ matmul_settings.m0(M0);
+ matmul_settings.n0(N0);
+ matmul_settings.k0(K0);
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_lhs{};
+ TensorType t_rhs{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_lhs.allocator()->init(*lhs_info);
+ t_rhs.allocator()->init(*rhs_info);
+ t_dst.allocator()->init(*dst_info);
+
+ ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable());
+
+ // Allocate and fill user tensors
+ t_lhs.allocator()->allocate();
+ t_rhs.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!t_lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!t_rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable());
+
+ fill(AccessorType(t_lhs), 0);
+ fill(AccessorType(t_rhs), 1);
+
+ // Run runtime
+ runtime.run({&t_lhs, &t_rhs, &t_dst});
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape_a,
+ const TensorShape &shape_b,
+ const TensorShape &output_shape,
+ bool pretranspose_a,
+ bool pretranspose_b,
+ DataType data_type)
+ {
+ // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D
+ // This is necessary unless we choose to extend gemm reference for 5D+ tensors
+ TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ);
+ TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ);
+ TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ);
+
+ // Create reference
+ SimpleTensor<T> a{shape_a_collapsed, data_type, 1};
+ SimpleTensor<T> b{shape_b_collapsed, data_type, 1};
+ SimpleTensor<T> c{output_shape_collapsed, data_type, 1};
+
+ // Fill reference
+ fill(a, 0);
+ fill(b, 1);
+
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape());
+ a_transposed_shape.set(0, a.shape().y());
+ a_transposed_shape.set(1, a.shape().x());
+
+ TensorShape b_transposed_shape(b.shape());
+ b_transposed_shape.set(0, b.shape().y());
+ b_transposed_shape.set(1, b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{a_transposed_shape, data_type};
+ SimpleTensor<T> b_transposed{b_transposed_shape, data_type};
+
+ //pretranspose a if necessary
+ if (pretranspose_a)
+ {
+ a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U));
+ }
+
+ // pretranspose b if necessary
+ if (pretranspose_b)
+ {
+ b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U));
+ }
+
+ // Use transposed tensors if boolean enabled else use original tensors
+ SimpleTensor<T> result =
+ reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f);
+
+ // We reshape the gemm output back if the tensor is high dimensional
+ if (output_shape_collapsed != output_shape)
+ {
+ // std::cout << "called reshape: \n";
+ result = reference::reshape_layer(result, output_shape);
+ }
+
+ return result;
+ }
+
+ CLTensor _target{};
+ SimpleTensor<T> _reference{};
+ bool _device_supports_export_to_cl_image{false};
+ bool _device_supports_mmul{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuMatMulValidationFixture
+ : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape lhs_shape,
+ TensorShape rhs_shape,
+ TensorShape output_shape,
+ bool transpose_a,
+ bool transpose_b,
+ int M0,
+ int N0,
+ int K0,
+ bool export_rhs_to_cl_image,
+ DataType data_type)
+ {
+ ARM_COMPUTE_UNUSED(export_rhs_to_cl_image);
+ DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0,
+ false /* export_rhs_to_cl_image bias */, data_type);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
new file mode 100644
index 0000000000..b0c7143d91
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
+#include "src/dynamic_fusion/utils/Utils.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/PoolingLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type)
+ {
+ _target = compute_target(input_shape, pool_attr, data_type);
+ _reference = compute_reference(
+ input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ switch (tensor.data_type())
+ {
+ case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ case DataType::F32:
+ {
+ std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ break;
+ }
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ // Given input is in nchw format
+ TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type)
+ {
+ CLScheduler::get().default_reinit();
+
+ // Change shape due to NHWC data layout, test shapes are NCHW
+ permute(input_shape, PermutationVector(2U, 0U, 1U));
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC));
+ auto dst_info = context.create_tensor_info();
+
+ // Create Pool2dSettings
+ GpuPool2dSettings pool_settings = GpuPool2dSettings();
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+ // Construct user tensors
+ TensorType t_input{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_input.allocator()->init(*input_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_input.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_input), 0);
+
+ // Run runtime
+ runtime.run({&t_input, &t_dst});
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type)
+ {
+ // Create reference
+ SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo());
+ // Fill reference
+ fill(src, 0);
+ return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dValidationFixture
+ : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape,
+ PoolingType pool_type,
+ Size2D pool_size,
+ Padding2D pad,
+ Size2D stride,
+ bool exclude_padding,
+ DataType data_type)
+ {
+ DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ input_shape,
+ Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(
+ exclude_padding),
+ data_type);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dSpecialValidationFixture
+ : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type)
+ {
+ DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ input_shape, pool_attr, data_type);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h
new file mode 100644
index 0000000000..c9ffbccbc7
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename... TArgs>
+class DynamicFusionActivationValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape, bool fuse, DataType data_type, ActivationLayerInfo act_info, TArgs... args)
+ {
+ _fuse = fuse;
+ _data_type = data_type;
+ _function = act_info.activation();
+ _target = compute_target(shape, args...);
+ _reference = compute_reference(shape, act_info);
+ }
+
+protected:
+ std::vector<T> get_boundary_values(T min, T max)
+ {
+ // This function will return a vector filled with the following values that can
+ // represent two partitions derived from equivalent partitioning.
+ // * Lower partition: min, min + delta, lower quarter (nominal), center - delta
+ // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+ const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+ const auto center_value = (min + max) / 2;
+ const auto lower_quarter = (min + center_value) / 2;
+ const auto upper_quarter = (center_value + max) / 2;
+
+ std::vector<T> boundary_values{};
+
+ // To ensure all the inserted values are within the given range after subtracing/adding delta
+ auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+ {
+ for (auto &v : new_values)
+ {
+ if (v >= min && v <= max)
+ {
+ boundary_values.emplace_back(v);
+ }
+ }
+ };
+
+ insert_values({min, static_cast<T>(min + delta), static_cast<T>(lower_quarter),
+ static_cast<T>(center_value - delta)}); // lower partition
+ insert_values({static_cast<T>(center_value), static_cast<T>(center_value + delta),
+ static_cast<T>(upper_quarter), static_cast<T>(max - delta), max}); // upper partition
+
+ return boundary_values;
+ }
+
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ float min_bound = 0;
+ float max_bound = 0;
+ std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(_function, _data_type);
+ library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+ }
+
+ TensorType compute_target(const TensorShape &shape, TArgs... args)
+ {
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+ ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+
+ ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, args...);
+ if (_fuse)
+ {
+ ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, args...);
+ GpuOutput::create_op(sketch, ans_1_info, dst_info);
+ }
+ else
+ {
+ GpuOutput::create_op(sketch, ans_0_info, dst_info);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_src.allocator()->init(*src_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_src));
+
+ // Run runtime
+ runtime.run({&t_src, &t_dst});
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info)
+ {
+ // Create reference
+ SimpleTensor<T> src{shape, _data_type, 1};
+
+ // Fill reference
+ fill(src);
+
+ auto tmp = reference::activation_layer<T>(src, act_info);
+
+ if (_fuse)
+ {
+ auto dst = reference::activation_layer<T>(tmp, act_info);
+ return dst;
+ }
+ else
+ {
+ return tmp;
+ }
+ }
+
+protected:
+ ActivationLayerInfo::ActivationFunction _function{};
+ bool _fuse{false};
+ DataType _data_type{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSigmoidValidationFixture
+ : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, bool fuse, DataType data_type)
+ {
+ ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse,
+ data_type, act_info);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionTanhValidationFixture
+ : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, bool fuse, DataType data_type)
+ {
+ ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+ DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse,
+ data_type, act_info);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
new file mode 100644
index 0000000000..08fffb305b
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/DepthConvertLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class DynamicFusionCastValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+ {
+ _target = compute_target(shape, dt_in, dt_out, policy);
+ _reference = compute_reference(shape, dt_in, dt_out, policy);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i, DataType dt_in, DataType dt_out)
+ {
+ // Restricting range to avoid inf values
+ if (dt_out == DataType::F16)
+ {
+ constexpr int signed_min = -32000;
+ constexpr int signed_max = 32000;
+ constexpr int unsigned_min = 0;
+ constexpr int unsigned_max = 65000;
+
+ switch (dt_in)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::S8:
+ case DataType::F32:
+ {
+ library->fill_tensor_uniform(tensor, i);
+ break;
+ }
+ case DataType::U16:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min),
+ static_cast<uint16_t>(unsigned_max));
+ break;
+ }
+ case DataType::S16:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min),
+ static_cast<int16_t>(signed_max));
+ break;
+ }
+ case DataType::U32:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min),
+ static_cast<uint32_t>(unsigned_max));
+ break;
+ }
+ case DataType::S32:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min),
+ static_cast<int32_t>(signed_max));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+ }
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ // Given input is in nchw format
+ TensorType
+ compute_target(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy)
+ {
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ // Here, we use DataLayout::NCHW just for the test. However, the optimal data layout to
+ // be used with dynamic fusion is NHWC
+ ITensorInfo *src_info =
+ context.create_tensor_info(TensorInfo(shape, 1, dt_in, DataLayout::NCHW)); // layout is not important
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ CastAttributes attributes;
+ attributes.convert_policy(policy).data_type(dt_out);
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_src.allocator()->init(*src_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_src), 0, dt_in, dt_out);
+
+ // Run runtime
+ runtime.run({&t_src, &t_dst});
+ return t_dst;
+ }
+
+ SimpleTensor<T2>
+ compute_reference(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy)
+ {
+ // Create reference
+ SimpleTensor<T1> src{shape, dt_in, 1};
+
+ // Fill reference
+ fill(src, 0, dt_in, dt_out);
+
+ return reference::depth_convert<T1, T2>(src, dt_out, policy, 0);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T2> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
new file mode 100644
index 0000000000..e8f6f83e42
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionClampValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape, ClampAttributes attributes, bool fuse, DataType data_type)
+ {
+ // CLAMP is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped.
+ ActivationLayerInfo act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ _fuse = fuse;
+ _attributes = attributes;
+ _data_type = data_type;
+ _target = compute_target(shape, attributes);
+ _reference = compute_reference(shape, act_info);
+ }
+
+protected:
+ std::vector<T> get_boundary_values(T min, T max)
+ {
+ // This function will return a vector filled with the following values that can
+ // represent two partitions derived from equivalent partitioning.
+ // * Lower partition: min, min + delta, lower quarter (nominal), center - delta
+ // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+ const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+ const auto center_value = (min + max) / 2;
+ const auto lower_quarter = (min + center_value) / 2;
+ const auto upper_quarter = (center_value + max) / 2;
+
+ std::vector<T> boundary_values{};
+
+ // To ensure all the inserted values are within the given range after subtracing/adding delta
+ auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+ {
+ for(auto &v : new_values)
+ {
+ if(v >= min && v <= max)
+ {
+ boundary_values.emplace_back(v);
+ }
+ }
+ };
+
+ insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) }); // lower partition
+ insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition
+
+ return boundary_values;
+ }
+
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ float min_bound = 0;
+ float max_bound = 0;
+ std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type);
+ library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+ }
+
+ TensorType compute_target(const TensorShape &shape, ClampAttributes attributes)
+ {
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &context };
+
+ // Create sketch tensors
+ ITensorInfo* src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+ ITensorInfo* dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+
+ ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, attributes);
+ if(_fuse)
+ {
+ ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, attributes);
+ GpuOutput::create_op(sketch, ans_1_info, dst_info);
+ }
+ else
+ {
+ GpuOutput::create_op(sketch, ans_0_info, dst_info);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_src.allocator()->init(*src_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_src));
+
+ // Run runtime
+ runtime.run({ &t_src, &t_dst });
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info)
+ {
+ // Create reference
+ SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info };
+
+ // Fill reference
+ fill(src);
+
+ auto dst = reference::activation_layer<T>(src, act_info, _quantization_info);
+ return dst;
+ }
+
+protected:
+ QuantizationInfo _quantization_info{};
+ ClampAttributes _attributes{};
+ bool _fuse{ false };
+ DataType _data_type{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
new file mode 100644
index 0000000000..f02aa5e36a
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/Globals.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing
+ * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros
+ * to print the test data.
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulValidationFixture : public framework::Fixture
+{
+public:
+ void setup(const TensorShape &shape0,
+ const TensorShape &shape1,
+ const TensorShape &shape2,
+ DataType data_type,
+ bool is_inplace,
+ bool fuse_two_ops = false)
+ {
+ _data_type = data_type;
+ _is_inplace = is_inplace;
+ _fuse = fuse_two_ops;
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
+ _target = compute_target(shape0, shape1, shape2);
+ _reference = compute_reference(shape0, shape1, shape2);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+
+ TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Fuse first multiplication op
+ ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type));
+ ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type));
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ ITensorInfo *rhs_info_fuse = nullptr;
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info);
+
+ if (_fuse)
+ {
+ rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type));
+ ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse);
+ GpuOutput::create_op(sketch, ans2_info, dst_info);
+ }
+ else
+ {
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_lhs{};
+ TensorType t_rhs{};
+ TensorType t_rhs_fuse{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_lhs.allocator()->init(*lhs_info);
+ t_rhs.allocator()->init(*rhs_info);
+ t_dst.allocator()->init(*dst_info);
+ if (_fuse)
+ {
+ t_rhs_fuse.allocator()->init(*rhs_info_fuse);
+ }
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_lhs.allocator()->allocate();
+ t_rhs.allocator()->allocate();
+ t_dst.allocator()->allocate();
+ if (_fuse)
+ {
+ t_rhs_fuse.allocator()->allocate();
+ }
+
+ fill(AccessorType(t_lhs), 0);
+ fill(AccessorType(t_rhs), 1);
+ if (_fuse)
+ {
+ fill(AccessorType(t_rhs_fuse), 2);
+ }
+
+ // Run runtime
+ if (_fuse)
+ {
+ runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst});
+ }
+ else
+ {
+ runtime.run({&t_lhs, &t_rhs, &t_dst});
+ }
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ // Create reference
+ SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()};
+ SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()};
+
+ // Fill reference
+ fill(ref_lhs, 0);
+ fill(ref_rhs, 1);
+ SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>(
+ ref_lhs, ref_rhs, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type,
+ QuantizationInfo());
+ if (_fuse)
+ {
+ fill(ref_rhs_fuse, 2);
+ SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>(
+ ref_dst, ref_rhs_fuse, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type,
+ QuantizationInfo());
+ return ref_dst_fuse;
+ }
+ return ref_dst;
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ bool _is_inplace{false};
+ bool _fuse{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulOneOpValidationFixture
+ : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape0, DataType data_type, bool is_inplace)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape0, shape0, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulBroadcastValidationFixture
+ : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape0, shape1, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulTwoOpsValidationFixture
+ : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(const TensorShape &shape0,
+ const TensorShape &shape1,
+ const TensorShape &shape2,
+ DataType data_type,
+ bool is_inplace,
+ bool fuse_two_ops)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
new file mode 100644
index 0000000000..bde3360940
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h"
+
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuReshapeLayerValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
+ {
+ _target = compute_target(input_shape, output_shape, data_type);
+ _reference = compute_reference(input_shape, output_shape, data_type);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+
+ TensorType compute_target(TensorShape &input_shape, TensorShape &output_shape, DataType data_type)
+ {
+ // Check if indeed the input shape can be reshape to the output one
+ ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size());
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *src_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type));
+ ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(output_shape, 1, data_type));
+ ReshapeAttributes attributes;
+ attributes.shape(output_shape);
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes);
+ GpuOutput::create_op(sketch, ans_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst{};
+ // Initialize user tensors
+ t_src.allocator()->init(*src_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_src), 0);
+
+ // Run runtime
+ runtime.run({&t_src, &t_dst});
+
+ return t_dst;
+ }
+
+ SimpleTensor<T>
+ compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type)
+ {
+ // Create reference
+ SimpleTensor<T> src{input_shape, data_type};
+
+ // Fill reference
+ fill(src, 0);
+
+ return reference::reshape_layer<T>(src, output_shape);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+/** [ReshapeLayer fixture] **/
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
new file mode 100644
index 0000000000..711767b66f
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
@@ -0,0 +1,272 @@
+/*
+* Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/Scale.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionResizeGenericValidationFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape,
+ DataType data_type,
+ QuantizationInfo quantization_info,
+ DataLayout data_layout,
+ InterpolationPolicy interpolation_policy,
+ SamplingPolicy sampling_policy,
+ bool align_corners,
+ QuantizationInfo output_quantization_info)
+ {
+ _shape = shape;
+ _interpolation_policy = interpolation_policy;
+ _sampling_policy = sampling_policy;
+ _data_type = data_type;
+ _input_quantization_info = quantization_info;
+ _output_quantization_info = output_quantization_info;
+ _align_corners = align_corners;
+ _data_layout = data_layout;
+
+ ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion resize supports only NHWC layout
+
+ generate_scale(shape);
+
+ std::mt19937 generator(library->seed());
+ std::uniform_int_distribution<uint32_t> distribution_u8(0, 255);
+
+ _target = compute_target(shape);
+ _reference = compute_reference(shape);
+ }
+
+protected:
+ void generate_scale(const TensorShape &shape)
+ {
+ static constexpr float _min_scale{0.25f};
+ static constexpr float _max_scale{3.f};
+
+ constexpr float max_width{8192.0f};
+ constexpr float max_height{6384.0f};
+ constexpr float min_width{1.f};
+ constexpr float min_height{1.f};
+
+ std::mt19937 generator(library->seed());
+ std::uniform_real_distribution<float> distribution_float(_min_scale, _max_scale);
+
+ auto generate = [&](size_t input_size, float min_output, float max_output) -> int
+ {
+ const float generated_scale = distribution_float(generator);
+ const int output_size = static_cast<int>(
+ utility::clamp(static_cast<float>(input_size) * generated_scale, min_output, max_output));
+ return output_size;
+ };
+
+ // Input shape is always given in NCHW layout. NHWC is dealt by permute in compute_target()
+ const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT);
+
+ _output_width = generate(shape[idx_width], min_width, max_width);
+ _output_height = generate(shape[idx_height], min_height, max_height);
+ }
+
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ if (tensor.data_type() == DataType::F32)
+ {
+ std::uniform_real_distribution<float> distribution(-5.0f, 5.0f);
+ library->fill(tensor, distribution, 0);
+ }
+ else if (tensor.data_type() == DataType::F16)
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-5.0f, 5.0f};
+ library->fill(tensor, distribution, 0);
+ }
+ else if (is_data_type_quantized(tensor.data_type()))
+ {
+ std::uniform_int_distribution<> distribution(0, 100);
+ library->fill(tensor, distribution, 0);
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
+ }
+
+ TensorType compute_target(TensorShape shape)
+ {
+ // Our test shapes are assumed in NCHW data layout, thus the permutation
+ permute(shape, PermutationVector(2U, 0U, 1U));
+
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ // Create sketch tensors
+ ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type, _data_layout));
+ src_info->set_quantization_info(_input_quantization_info);
+ ITensorInfo *dst_info = context.create_tensor_info();
+
+ ResizeAttributes attributes;
+ attributes.align_corners(_align_corners)
+ .sampling_policy(_sampling_policy)
+ .interpolation_policy(_interpolation_policy)
+ .output_width(_output_width)
+ .output_height(_output_height);
+
+ ITensorInfo *scale_result_info = FunctionType::create_op(sketch, src_info, attributes);
+ GpuOutput::create_op(sketch, scale_result_info, dst_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_src.allocator()->init(*src_info);
+ t_dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst.allocator()->allocate();
+
+ fill(AccessorType(t_src));
+
+ // Run runtime
+ runtime.run({&t_src, &t_dst});
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape)
+ {
+ // Create reference
+ SimpleTensor<T> src{shape, _data_type, 1, _input_quantization_info};
+
+ // Reference code is NCHW, so the input shapes are NCHW
+ const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT);
+
+ const float scale_x = static_cast<float>(_output_width) / shape[idx_width];
+ const float scale_y = static_cast<float>(_output_height) / shape[idx_height];
+
+ // Fill reference
+ fill(src);
+
+ return reference::scale<T>(src, scale_x, scale_y, _interpolation_policy, BorderMode::REPLICATE,
+ static_cast<T>(0), _sampling_policy, /* ceil_policy_scale */ false, _align_corners,
+ _output_quantization_info);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ TensorShape _shape{};
+ InterpolationPolicy _interpolation_policy{};
+ SamplingPolicy _sampling_policy{};
+ DataType _data_type{};
+ DataLayout _data_layout{};
+ QuantizationInfo _input_quantization_info{};
+ QuantizationInfo _output_quantization_info{};
+ bool _align_corners{false};
+ int _output_width{0};
+ int _output_height{0};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionResizeValidationFixture
+ : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape,
+ DataType data_type,
+ DataLayout data_layout,
+ InterpolationPolicy policy,
+ SamplingPolicy sampling_policy,
+ bool align_corners)
+ {
+ DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape, data_type, QuantizationInfo(), data_layout, policy, sampling_policy, align_corners,
+ QuantizationInfo());
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class DynamicFusionResizeQuantizedValidationFixture
+ : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape,
+ DataType data_type,
+ QuantizationInfo quantization_info,
+ DataLayout data_layout,
+ InterpolationPolicy policy,
+ SamplingPolicy sampling_policy,
+ bool align_corners)
+ {
+ DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape, data_type, quantization_info, data_layout, policy, sampling_policy, align_corners,
+ quantization_info);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
new file mode 100644
index 0000000000..175d4ff889
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
@@ -0,0 +1,158 @@
+/*
+* Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H
+
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/reference/SoftmaxLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationGenericFixture : public framework::Fixture
+{
+public:
+ void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+ {
+ _reference = compute_reference(shape, data_type, beta, axis, is_log);
+ _target = compute_target(shape, data_type, beta, axis, is_log);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ if (tensor.data_type() == DataType::F32)
+ {
+ std::uniform_real_distribution<float> distribution(-10.0f, 10.0f);
+ library->fill(tensor, distribution, 0);
+ }
+ else if (tensor.data_type() == DataType::F16)
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-10.0f, 10.0f};
+ library->fill(tensor, distribution, 0);
+ }
+ else if (!is_data_type_quantized(tensor.data_type()))
+ {
+ std::uniform_int_distribution<> distribution(0, 100);
+ library->fill(tensor, distribution, 0);
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
+ }
+
+ TensorType compute_target(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+ {
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
+
+ SoftmaxAttributes softmax_attr{};
+ softmax_attr.axis(axis).beta(beta).is_log_softmax(is_log);
+ ITensorInfo *src_info = context.create_tensor_info(shape, 1, data_type);
+ ITensorInfo *dst_info = context.create_tensor_info(shape, 1, data_type);
+ FunctionType::create_op(sketch, src_info, dst_info, softmax_attr);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+ for (auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+ // Construct user tensors
+ TensorType src{};
+ TensorType dst{};
+
+ // Initialize user tensors
+ src.allocator()->init(*src_info);
+ dst.allocator()->init(*dst_info);
+
+ // Allocate and fill user tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+ fill(AccessorType(src));
+
+ // Run runtime
+ runtime.run({&src, &dst});
+
+ return dst;
+ }
+
+ SimpleTensor<T>
+ compute_reference(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+ {
+ // Create reference
+ SimpleTensor<T> src{shape, data_type, 1};
+
+ // Fill reference
+ fill(src);
+
+ return reference::softmax_layer<T>(src, beta, axis, is_log);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationFixture
+ : public DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+ {
+ DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+ shape, data_type, beta, axis, is_log);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H
diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp
index 664b969125..2172362bdd 100644
--- a/tests/validation/reference/ActivationLayer.cpp
+++ b/tests/validation/reference/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "ActivationLayer.h"
#include "arm_compute/core/Types.h"
+
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -40,7 +41,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
ARM_COMPUTE_UNUSED(oq_info);
// Create reference
- SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
+ SimpleTensor<T> dst{src.shape(), src.data_type(), 1};
// Compute reference
const T a(info.a());
@@ -48,7 +49,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
#if defined(_OPENMP)
#pragma omp parallel for
#endif /* _OPENMP */
- for(int i = 0; i < src.num_elements(); ++i)
+ for (int i = 0; i < src.num_elements(); ++i)
{
dst[i] = activate_float<T>(src[i], a, b, info.activation());
}
@@ -57,7 +58,8 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
}
template <>
-SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<uint8_t>
+activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -68,7 +70,8 @@ SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src
}
template <>
-SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<int8_t>
+activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -79,7 +82,8 @@ SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, A
}
template <>
-SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<int16_t>
+activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -88,9 +92,14 @@ SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src
SimpleTensor<int16_t> dst = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo);
return dst;
}
-template SimpleTensor<int32_t> activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
-template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
-template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<int32_t>
+activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<float>
+activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<half>
+activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<bfloat16>
+activation_layer(const SimpleTensor<bfloat16> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h
index 8aad1af63e..7f896bd696 100644
--- a/tests/validation/reference/ActivationLayer.h
+++ b/tests/validation/reference/ActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020,2022,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_H
-#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
+#define ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -40,7 +40,7 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
{
T ret;
- switch(activation)
+ switch (activation)
{
case ActivationLayerInfo::ActivationFunction::ABS:
ret = std::abs(x);
@@ -61,13 +61,13 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
ret = std::min<T>(a, std::max<T>(b, x));
break;
case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- ret = (x > 0) ? x : a * x;
+ ret = x > static_cast<T>(0) ? x : static_cast<T>(a * x);
break;
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- ret = std::log(static_cast<T>(1) + std::exp(x));
+ ret = std::log(static_cast<T>(1) + std::exp(static_cast<double>(x)));
break;
case ActivationLayerInfo::ActivationFunction::ELU:
- ret = (x > 0) ? x : a * (std::exp(x) - static_cast<T>(1));
+ ret = x > static_cast<T>(0) ? x : static_cast<T>(a * (std::exp(x) - static_cast<T>(1)));
break;
case ActivationLayerInfo::ActivationFunction::SQRT:
ret = std::sqrt(x);
@@ -82,7 +82,14 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
ret = x;
break;
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * 0.166666667f);
+ ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) *
+ 0.166666667f);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ ret = static_cast<T>(x) / (static_cast<T>(1) + std::exp(-a * x));
+ break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ ret = x * 0.5f * (1 + erf(x / std::sqrt(2.0f)));
break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -93,9 +100,11 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
}
template <typename T>
-SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info = QuantizationInfo());
+SimpleTensor<T> activation_layer(const SimpleTensor<T> &src,
+ ActivationLayerInfo info,
+ const QuantizationInfo &oq_info = QuantizationInfo());
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
diff --git a/tests/validation/reference/BatchToSpaceLayer.cpp b/tests/validation/reference/BatchToSpaceLayer.cpp
index 404ee73cac..63d121f59b 100644
--- a/tests/validation/reference/BatchToSpaceLayer.cpp
+++ b/tests/validation/reference/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,10 @@
*/
#include "BatchToSpaceLayer.h"
+#include "arm_compute/core/Validate.h"
#include "tests/validation/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
namespace arm_compute
{
namespace test
@@ -35,32 +37,37 @@ namespace reference
{
// Batch to Space
template <typename T>
-SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape)
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape)
{
- ARM_COMPUTE_ERROR_ON(block_shape[0] <= 0);
- ARM_COMPUTE_ERROR_ON(block_shape[1] <= 0);
- SimpleTensor<T> result(dst_shape, src.data_type());
+ ARM_COMPUTE_ERROR_ON(block_shape[0] < 1);
+ ARM_COMPUTE_ERROR_ON(block_shape[1] < 1);
+ const auto expected_dst_shape = misc::shape_calculator::compute_batch_to_space_shape(DataLayout::NCHW, src.shape(), block_shape[0], block_shape[1], crop_info);
+ ARM_COMPUTE_ERROR_ON(arm_compute::detail::have_different_dimensions(expected_dst_shape, dst_shape, 0));
+ ARM_COMPUTE_UNUSED(expected_dst_shape);
- int in_pos = 0;
- const auto width_in = static_cast<int>(src.shape()[0]);
- const auto height_in = static_cast<int>(src.shape()[1]);
- const auto z_in = static_cast<int>(src.shape()[2]);
- const auto batch_in = static_cast<int>(src.shape()[3]);
+ SimpleTensor<T> result(dst_shape, src.data_type());
+ int out_pos = 0;
+ const auto width_out = static_cast<int>(dst_shape[0]);
+ const auto height_out = static_cast<int>(dst_shape[1]);
+ const auto z_out = static_cast<int>(dst_shape[2]);
+ const auto batch_out = static_cast<int>(dst_shape[3]);
- for(int batch = 0; batch < batch_in; ++batch)
+ for(int batch = 0; batch < batch_out; ++batch)
{
- for(int z = 0; z < z_in; ++z)
+ for(int z = 0; z < z_out; ++z)
{
- for(int y = 0; y < height_in; ++y)
+ for(int y = 0; y < height_out; ++y)
{
- for(int x = 0; x < width_in; ++x)
+ for(int x = 0; x < width_out; ++x)
{
- const int r = src.shape()[3] / (block_shape[0] * block_shape[1]);
- const int out_x = (block_shape[0] * x + (batch / r) % block_shape[0]);
- const int out_y = (block_shape[1] * y + (batch / r) / block_shape[0]);
- const int out_pos = out_x + dst_shape[0] * out_y + z * dst_shape[0] * dst_shape[1] + (batch % r) * dst_shape[0] * dst_shape[1] * dst_shape[2];
- result[out_pos] = src[in_pos];
- ++in_pos;
+ const int x_c = x + crop_info.left;
+ const int y_c = y + crop_info.top;
+ const int in_batch = batch + ((x_c % block_shape[0]) + (y_c % block_shape[1]) * (block_shape[0])) * dst_shape[3];
+ const int in_x = x_c / block_shape[0];
+ const int in_y = y_c / block_shape[1];
+ const int in_pos = in_x + src.shape()[0] * in_y + z * src.shape()[0] * src.shape()[1] + in_batch * src.shape()[0] * src.shape()[1] * src.shape()[2];
+ result[out_pos] = src[in_pos];
+ ++out_pos;
}
}
}
@@ -68,8 +75,8 @@ SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<in
return result;
}
-template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
-template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
+template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/BatchToSpaceLayer.h b/tests/validation/reference/BatchToSpaceLayer.h
index 52556cb53f..a37bfc3373 100644
--- a/tests/validation/reference/BatchToSpaceLayer.h
+++ b/tests/validation/reference/BatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H
#define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H
+#include "arm_compute/core/Types.h"
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -36,7 +37,7 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp
new file mode 100644
index 0000000000..e4010a507a
--- /dev/null
+++ b/tests/validation/reference/Conv3D.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Conv3D.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "support/AclRequires.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+// Source/Destination Tensor shape indices (N D H W C)
+constexpr unsigned int batch_dim = 4u;
+constexpr unsigned int depth_dim = 3u;
+constexpr unsigned int height_dim = 2u;
+constexpr unsigned int width_dim = 1u;
+constexpr unsigned int channel_dim = 0u;
+
+// Weight tensor shape indices (D H W Cin Cout)
+constexpr unsigned int weights_depth_dim = 4u;
+constexpr unsigned int weights_height_dim = 3u;
+constexpr unsigned int weights_width_dim = 2u;
+constexpr unsigned int weights_CHin_dim = 1u;
+constexpr unsigned int weights_CHout_dim = 0u;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+inline bool is_valid_pixel(int i, int min, int max)
+{
+ return (i >= min && i < max);
+}
+
+// Evaluate the weights against an element in a given tensor.
+template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
+T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
+ int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
+{
+ ARM_COMPUTE_UNUSED(oq_info);
+
+ const unsigned int weights_width = weights.shape()[weights_width_dim];
+ const unsigned int weights_height = weights.shape()[weights_height_dim];
+ const unsigned int weights_depth = weights.shape()[weights_depth_dim];
+
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int src_width = src.shape()[width_dim];
+ const unsigned int src_height = src.shape()[height_dim];
+ const unsigned int src_depth = src.shape()[depth_dim];
+
+ T total(0);
+ for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
+ {
+ const int idx_z = z_start + dilation.depth * weight_d;
+ for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y)
+ {
+ const int idx_y = y_start + dilation.height * weight_y;
+ for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x)
+ {
+ const int idx_x = x_start + dilation.width * weight_x;
+
+ //Check if the point is within padding
+ const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width);
+ const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height);
+ const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth);
+ const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid);
+ if(is_invalid_pixel)
+ {
+ continue;
+ }
+
+ for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in)
+ {
+ const T *in_ptr = src.data();
+ const T *w_ptr = weights.data();
+
+ const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch });
+ const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d });
+ T input_value = in_ptr[in_offset];
+ T weight_value = w_ptr[weight_offset];
+ total += (input_value * weight_value);
+ }
+ }
+ }
+ }
+
+ const TB *b_ptr = bias.data();
+ TB bias_value = b_ptr[ch_out];
+
+ return total + bias_value;
+}
+
+template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
+T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
+ int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
+{
+ const unsigned int weights_width = weights.shape()[weights_width_dim];
+ const unsigned int weights_height = weights.shape()[weights_height_dim];
+ const unsigned int weights_depth = weights.shape()[weights_depth_dim];
+
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int src_width = src.shape()[width_dim];
+ const unsigned int src_height = src.shape()[height_dim];
+ const unsigned int src_depth = src.shape()[depth_dim];
+
+ const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
+ const int input_offset = -iq_info.offset;
+ const float input_scale = iq_info.scale;
+ int weights_offset = -wq_info.offset;
+ float weights_scale = wq_info.scale;
+ const int output_offset = oq_info.offset;
+ const float output_scale = oq_info.scale;
+
+ int output_multiplier = 0;
+ int output_shift = 0;
+ const float multiplier = input_scale * weights_scale / output_scale;
+ arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
+
+ int32_t total(0);
+ for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
+ {
+ const int idx_z = z_start + dilation.depth * weight_d;
+ for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y)
+ {
+ const int idx_y = y_start + dilation.height * weight_y;
+ for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x)
+ {
+ const int idx_x = x_start + dilation.width * weight_x;
+
+ //Check if the point is within padding
+ const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width);
+ const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height);
+ const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth);
+ const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid);
+ if(is_invalid_pixel)
+ {
+ continue;
+ }
+
+ for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in)
+ {
+ const T *in_ptr = src.data();
+ const T *w_ptr = weights.data();
+
+ const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch });
+ const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d });
+ T input_value = in_ptr[in_offset];
+ T weight_value = w_ptr[weight_offset];
+ total += ((input_value + input_offset) * (weight_value + weights_offset));
+ }
+ }
+ }
+ }
+
+ const TB *b_ptr = bias.data();
+ TB bias_value = b_ptr[ch_out];
+
+ total += bias_value;
+
+ return validation::quantize_down_scale_by_fixedpoint(total, output_multiplier, output_shift, output_offset,
+ std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max());
+}
+} // namespace
+
+template <typename T, typename TB>
+SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info)
+{
+ // Compute reference
+ const unsigned int batch_size = src.shape()[batch_dim];
+ const unsigned int dst_width = dst.shape()[width_dim];
+ const unsigned int dst_height = dst.shape()[height_dim];
+ const unsigned int dst_depth = dst.shape()[depth_dim];
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int weights_out_ch = weights.shape()[weights_CHout_dim];
+ const unsigned int dst_channels = dst.shape()[channel_dim];
+ const size_t pad_left = conv3d_info.padding.left;
+ const size_t pad_top = conv3d_info.padding.top;
+ const size_t pad_front = conv3d_info.padding.front;
+ const size_t stride_x = conv3d_info.stride.x();
+ const size_t stride_y = conv3d_info.stride.y();
+ const size_t stride_z = conv3d_info.stride.z();
+
+ const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_conv3d_shape(src.shape(), weights.shape(), conv3d_info);
+
+ ARM_COMPUTE_UNUSED(src_channels, weights_out_ch, dst_channels, dst_shape, weights_CHin_dim);
+ // Number of batches of source and destination tensors must match.
+ ARM_COMPUTE_ERROR_ON(src.shape()[batch_dim] != dst.shape()[batch_dim]);
+ // Input channels in the source and weights must match.
+ ARM_COMPUTE_ERROR_ON(src_channels != weights.shape()[weights_CHin_dim]);
+ // Weight channels in the destination and weights must match.
+ ARM_COMPUTE_ERROR_ON(weights_out_ch != dst_channels);
+ // Bias must match the number of destination channels.
+ ARM_COMPUTE_ERROR_ON(bias.shape()[0] != dst_channels);
+ // Compare given dst tensor shape with expected shape.
+ ARM_COMPUTE_ERROR_ON(dst.shape() != dst_shape);
+
+ for(unsigned int batch = 0; batch < batch_size; ++batch)
+ {
+ for(unsigned int z_out = 0; z_out < dst_depth; ++z_out)
+ {
+ const int z_start = (z_out * stride_z) - pad_front;
+ for(unsigned int y_out = 0; y_out < dst_height; ++y_out)
+ {
+ const int y_start = (y_out * stride_y) - pad_top;
+ for(unsigned int x_out = 0; x_out < dst_width; ++x_out)
+ {
+ const int x_start = (x_out * stride_x) - pad_left;
+ for(unsigned int ch_out = 0; ch_out < dst_channels; ++ch_out)
+ {
+ T *out_ptr = dst.data();
+
+ const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch });
+ out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
+ }
+ }
+ }
+ }
+ }
+ return dst;
+}
+
+template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
+ const Conv3dInfo &conv3d_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h
new file mode 100644
index 0000000000..e3674f4bfb
--- /dev/null
+++ b/tests/validation/reference/Conv3D.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H
+#define ARM_COMPUTE_TEST_CONV3D_LAYER_H
+
+#include "Utils.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename TB>
+SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst,
+ const Conv3dInfo &conv3d_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 1666e3857b..b67e88e839 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "support/Requires.h"
+#include "support/AclRequires.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp
index fd126c7d73..2b03c270ac 100644
--- a/tests/validation/reference/DFT.cpp
+++ b/tests/validation/reference/DFT.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -400,10 +400,10 @@ SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w,
auto padded_src = pad_layer(src, padding_in);
// Flip weights
- std::vector<uint32_t> axis_v = { 0, 1 };
- SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
+ std::vector<uint32_t> axis_v = { 0, 1 };
+ SimpleTensor<int32_t> axis{ TensorShape(2U), DataType::S32 };
std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
- auto flipped_w = reverse(w, axis);
+ auto flipped_w = reverse(w, axis, /* use_inverted_axis */ false);
// Pad weights to have the same size as input
const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index 94c719ade7..3f88897f8e 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -165,7 +165,7 @@ template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, Data
template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
// BFLOAT16
-template SimpleTensor<float> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
// F16
template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
@@ -186,6 +186,25 @@ template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, Dat
template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+// S64
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U64
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 64a89aa6a0..67d69c2c38 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt)
ARM_COMPUTE_UNUSED(dt);
return static_cast<TOut>(dequantize_qsymm16(val, qinfo));
}
+template <typename TOut>
+TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt)
+{
+ ARM_COMPUTE_UNUSED(dt);
+ return static_cast<TOut>(dequantize_s32(val, qinfo));
+}
} // namespace
template <typename TOut, typename TIn>
SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
@@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src
template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src);
template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src);
template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index f22c84e153..edbbab8600 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -74,15 +74,6 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy
case ArithmeticOperation::DIV:
{
val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
- if(std::is_integral<T>::value)
- {
- // Implement flooring division
- val = (src2 == 0) ? 0 : val;
- if(static_cast<int32_t>(src1) % static_cast<int32_t>(src2) != 0 && ((src1 < 0) != (src2 < 0)))
- {
- --val;
- }
- }
break;
}
case ArithmeticOperation::POWER:
diff --git a/tests/validation/reference/ElementwiseUnary.cpp b/tests/validation/reference/ElementwiseUnary.cpp
index 5333b53c15..558f9d24fc 100644
--- a/tests/validation/reference/ElementwiseUnary.cpp
+++ b/tests/validation/reference/ElementwiseUnary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,8 @@
* SOFTWARE.
*/
#include "ElementwiseUnary.h"
-
+#include "tests/validation/Helpers.h"
+#include "utils/TypePrinter.h"
namespace arm_compute
{
namespace test
@@ -32,10 +33,8 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op)
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op)
{
- SimpleTensor<T> dst(src.shape(), src.data_type());
-
for(int i = 0; i < src.num_elements(); ++i)
{
switch(op)
@@ -65,13 +64,107 @@ SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary o
ARM_COMPUTE_ERROR("Not implemented");
}
}
+ return dst;
+}
+template <>
+SimpleTensor<int8_t> elementwise_unary(const SimpleTensor<int8_t> &src, SimpleTensor<int8_t> &dst, ElementWiseUnary op)
+{
+ if(dst.data_type() == DataType::QASYMM8_SIGNED)
+ {
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp(src.shape(), DataType::F32);
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]);
+ }
+ else
+ {
+ // rsqrt(0) give 'inf' so set to the maximum in int8: 127
+ dst_tmp[i] = (127.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ;
+ }
+ break;
+
+ case ElementWiseUnary::LOG:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = std::log(src_tmp[i]);
+ }
+ else
+ {
+ dst_tmp[i] = (-128.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ;
+ }
+ break;
+
+ default:
+ elementwise_unary(src_tmp, dst_tmp, op);
+ break;
+ }
+ }
+ dst = convert_to_asymmetric<int8_t>(dst_tmp, dst.quantization_info());
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+ return dst;
+}
+template <>
+SimpleTensor<uint8_t> elementwise_unary(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &dst, ElementWiseUnary op)
+{
+ if(dst.data_type() == DataType::QASYMM8)
+ {
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp(src.shape(), DataType::F32);
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]);
+ }
+ else
+ {
+ // rsqrt(0) give 'inf' so set to the maximum in uint8: 255
+ dst_tmp[i] = (255.0f - dst.quantization_info().uniform().offset)* dst.quantization_info().uniform().scale;
+ }
+ break;
+ case ElementWiseUnary::LOG:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = std::log(src_tmp[i]);
+ }
+ else
+ {
+ dst_tmp[i] = -dst.quantization_info().uniform().offset * dst.quantization_info().uniform().scale;
+ }
+ break;
+
+ default:
+ elementwise_unary(src_tmp, dst_tmp, op);
+ break;
+ }
+ }
+ dst = convert_to_asymmetric<uint8_t>(dst_tmp, dst.quantization_info());
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
return dst;
}
-template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op);
-template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op);
-template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, ElementWiseUnary op);
+template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, SimpleTensor<float> &dst, ElementWiseUnary op);
+template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, SimpleTensor<half> &dst, ElementWiseUnary op);
+template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, SimpleTensor<int32_t> &dst, ElementWiseUnary op);
+
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ElementwiseUnary.h b/tests/validation/reference/ElementwiseUnary.h
index be4a229a5b..ae7a49bce4 100644
--- a/tests/validation/reference/ElementwiseUnary.h
+++ b/tests/validation/reference/ElementwiseUnary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,7 +35,7 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op);
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index 21333958f8..af30e9ee54 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -123,7 +123,7 @@ SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTe
// Create reference
SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info };
- // Sanity checks
+ // Health checks
const int num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
const int num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions;
const unsigned int linear_input_size = src.shape().total_size_lower(num_input_dimensions);
diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp
index 6b3aa390f0..d513343796 100644
--- a/tests/validation/reference/GEMM.cpp
+++ b/tests/validation/reference/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
namespace arm_compute
{
@@ -35,10 +36,11 @@ namespace validation
namespace reference
{
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
+SimpleTensor<T>
+gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
{
// Create reference
- SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
+ SimpleTensor<T> dst{c.shape(), c.data_type(), 1};
// Compute reference
const int M = a.shape().y();
@@ -50,30 +52,47 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
const int a_stride_z = K * M;
const int a_stride_w = K * M * D;
- const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
- const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+ const int b_stride_z =
+ b.shape().num_dimensions() > 2
+ ? N * K
+ : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ int b_stride_w =
+ b.shape().num_dimensions() > 3
+ ? K * N * D
+ : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions:
+ // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4
+ const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 &&
+ c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1;
+
+ // Batched-GEMM
+ if (is_batched_gemm)
+ {
+ b_stride_w = b_stride_z;
+ }
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
-#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__))
+#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__))
#pragma omp parallel for collapse(2)
#endif /* _OPENMP */
- for(int w = 0; w < W; ++w)
+ for (int w = 0; w < W; ++w)
{
- for(int depth = 0; depth < D; ++depth)
+ for (int depth = 0; depth < D; ++depth)
{
const int base_addr_a = depth * a_stride_z + w * a_stride_w;
const int base_addr_b = depth * b_stride_z + w * b_stride_w;
const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int row = 0; row < M; ++row)
+ for (int row = 0; row < M; ++row)
{
- for(int col = 0; col < N; ++col)
+ for (int col = 0; col < N; ++col)
{
T acc(0);
- for(int k = 0; k < K; ++k)
+ for (int k = 0; k < K; ++k)
{
acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N];
}
@@ -89,11 +108,12 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
}
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
+SimpleTensor<T> gemm_mixed_precision(
+ const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
{
// GEMM mixed-precision combines F32 accumulators with F16 multiplications
// Create reference
- SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
+ SimpleTensor<T> dst{c.shape(), c.data_type(), 1};
// Compute reference
const int M = a.shape().y();
@@ -105,36 +125,54 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso
const int a_stride_z = K * M;
const int a_stride_w = K * M * D;
- const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
- const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+ const int b_stride_z =
+ b.shape().num_dimensions() > 2
+ ? N * K
+ : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ int b_stride_w =
+ b.shape().num_dimensions() > 3
+ ? K * N * D
+ : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions:
+ // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4
+ const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 &&
+ c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1;
+
+ // Batched-GEMM
+ if (is_batched_gemm)
+ {
+ b_stride_w = b_stride_z;
+ }
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
-#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__))
+#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__))
#pragma omp parallel for collapse(2)
#endif /* _OPENMP */
- for(int w = 0; w < W; ++w)
+ for (int w = 0; w < W; ++w)
{
- for(int depth = 0; depth < D; ++depth)
+ for (int depth = 0; depth < D; ++depth)
{
const int base_addr_a = depth * a_stride_z + w * a_stride_w;
const int base_addr_b = depth * b_stride_z + w * b_stride_w;
const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int row = 0; row < M; ++row)
+ for (int row = 0; row < M; ++row)
{
- for(int col = 0; col < N; ++col)
+ for (int col = 0; col < N; ++col)
{
float acc(0);
- for(int k = 0; k < K; ++k)
+ for (int k = 0; k < K; ++k)
{
acc += static_cast<float>(a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]);
}
// Finalize the result: alpha * A * B + beta * C
- dst[base_addr_c + col + row * N] = static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]);
+ dst[base_addr_c + col + row * N] =
+ static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]);
}
}
}
@@ -143,8 +181,21 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso
return dst;
}
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
+void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst)
+{
+ // Compute reference
+ SimpleTensor<T> dst_gemm = gemm(a, b, c, alpha, beta);
+ reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE);
+}
+
+template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, const SimpleTensor<bfloat16> &b, const SimpleTensor<bfloat16> &c, float alpha, float beta);
template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta);
template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
+
+template void gemm_accumulate(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta, SimpleTensor<float> &dst);
+template void gemm_accumulate(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta, SimpleTensor<half> &dst);
+
template SimpleTensor<half> gemm_mixed_precision(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h
index 5feaeda584..1b97570122 100644
--- a/tests/validation/reference/GEMM.h
+++ b/tests/validation/reference/GEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMM_H
-#define ARM_COMPUTE_TEST_GEMM_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
+#define ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -41,8 +41,11 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta);
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
+void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst);
+
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
index 1615b51e73..30c577d850 100644
--- a/tests/validation/reference/GEMMLowp.cpp
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "GEMMLowp.h"
#include "arm_compute/core/Types.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
#include "support/ToolchainSupport.h"
@@ -230,6 +231,13 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c
return c;
}
+template <typename T_out, typename T_in, typename T_in_1>
+void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T_out> &dst)
+{
+ SimpleTensor<T_out> dst_gemm = gemmlowp_matrix_multiply_core<T_out, T_in, T_in_1>(a, b, shape_c, a_offset, b_offset);
+ reference::arithmetic_operation<T_out>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE);
+}
+
// used to validate assembly kernels which don't know anything about offsets
template <typename T1, typename T2, typename T3>
SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
@@ -336,6 +344,8 @@ template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<in
std::vector<int32_t> result_shift, int32_t min, int32_t max);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
+template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst);
+template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst);
template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
index 99015d71fb..6e471fdad1 100644
--- a/tests/validation/reference/GEMMLowp.h
+++ b/tests/validation/reference/GEMMLowp.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H
-#define ARM_COMPUTE_TEST_GEMMLOWP_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
+#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -38,6 +38,9 @@ namespace reference
template <typename T1, typename T2, typename T3>
SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
+template <typename T1, typename T2, typename T3>
+void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T1> &dst_);
+
template <typename T1, typename T2, typename T3 = T2>
SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c);
@@ -71,4 +74,4 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn>
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp
index 93ac09cf95..c90c04f8cc 100644
--- a/tests/validation/reference/Gather.cpp
+++ b/tests/validation/reference/Gather.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,27 +39,56 @@ namespace reference
template <typename T>
SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis)
{
- const auto *indices_ptr = static_cast<const uint32_t *>(indices.data());
const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_gather_shape(src.shape(), indices.shape(), actual_axis);
SimpleTensor<T> dst(dst_shape, src.data_type());
+ const auto src_ptr = static_cast<const T *>(src.data());
+ const auto indices_ptr = static_cast<const uint32_t *>(indices.data());
+ const auto dst_ptr = static_cast<T *>(dst.data());
+
+ const uint32_t index_limit = src.shape()[actual_axis];
+
Window win;
win.use_tensor_dimensions(dst_shape);
- execute_window_loop(win, [&](const Coordinates & id)
- {
- Coordinates offset;
- for(unsigned int dim = 0; dim < id.num_dimensions(); ++dim)
+
+ execute_window_loop(win, [&](const Coordinates &dst_coords) {
+ const auto dst_addr = coords2index(dst.shape(), dst_coords);
+
+ // Calculate the coordinates of the index value.
+ Coordinates idx_coords;
+
+ for(size_t i = 0; i < indices.shape().num_dimensions(); ++i)
{
- if(dim == actual_axis)
+ idx_coords.set(i, dst_coords[i + actual_axis]);
+ }
+
+ const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)];
+
+ if(index < index_limit)
+ {
+ // Calculate the coordinates of the source data.
+ Coordinates src_coords;
+
+ for(size_t i = 0; i < actual_axis; ++i)
{
- offset.set(dim, indices_ptr[id[dim]]);
+ src_coords.set(i, dst_coords[i]);
}
- else
+
+ src_coords.set(actual_axis, index);
+
+ for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i)
{
- offset.set(dim, id[dim]);
+ src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]);
}
+
+ // Copy the data.
+ const auto src_addr = coords2index(src.shape(), src_coords);
+ dst_ptr[dst_addr] = src_ptr[src_addr];
+ }
+ else
+ {
+ dst_ptr[dst_addr] = 0;
}
- *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset));
});
return dst;
@@ -72,4 +101,4 @@ template SimpleTensor<uint8_t> gather(const SimpleTensor<uint8_t> &src, const Si
} // namespace reference
} // namespace validation
} // namespace test
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp
new file mode 100644
index 0000000000..7500560c91
--- /dev/null
+++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "IndirectConv2dAddressPrecalculation.h"
+
+#include "arm_compute/core/Types.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_dst, const TensorShape &shape_dst,
+ const PadStrideInfo &conv_info)
+{
+ SimpleTensor<int32_t> out{ shape_dst, DataType::S32 };
+
+ constexpr unsigned int width_idx = 1;
+ constexpr unsigned int heigh_idx = 2;
+
+ const int src_conv_width = static_cast<int32_t>(shape_conv_src[width_idx]); // NHWC
+ const int src_conv_height = static_cast<int32_t>(shape_conv_src[heigh_idx]); // NHWC
+ const int dst_conv_width = static_cast<int32_t>(shape_conv_dst[width_idx]); // NHWC
+ const int wei_conv_width = static_cast<int32_t>(shape_conv_wei[width_idx]); // NHWC
+ const int wei_conv_height = static_cast<int32_t>(shape_conv_wei[heigh_idx]); // NHWC
+ const int dst_width = static_cast<int32_t>(shape_dst[0]);
+ const int dst_height = static_cast<int32_t>(shape_dst[1]);
+ const int dst_batch = static_cast<int32_t>(shape_dst[2]);
+ const int ks = wei_conv_width * wei_conv_height;
+ const int stride_x = static_cast<int32_t>(conv_info.stride().first);
+ const int stride_y = static_cast<int32_t>(conv_info.stride().second);
+ const int pad_left = static_cast<int32_t>(conv_info.pad_left());
+ const int pad_top = static_cast<int32_t>(conv_info.pad_top());
+
+ const int m0 = dst_width / ks;
+
+ for(int z = 0; z < dst_batch; ++z)
+ {
+ for(int y = 0; y < dst_height; ++y)
+ {
+ const int mout = y * m0;
+ for(int ki = 0; ki < ks; ++ki)
+ {
+ const int xk = ki % wei_conv_width;
+ const int yk = ki / wei_conv_width;
+ for(int mi = 0; mi < m0; ++mi)
+ {
+ int xi = ((mout + mi) % dst_conv_width) * stride_x;
+ int yi = ((mout + mi) / dst_conv_width) * stride_y;
+ xi -= pad_left;
+ yi -= pad_top;
+ const int x_s = xi + xk;
+ const int y_s = yi + yk;
+ int my = x_s + y_s * src_conv_width;
+ my = my + z * src_conv_width * src_conv_height;
+ my = x_s >= 0 ? my : -1;
+ my = x_s < src_conv_width ? my : -1;
+ my = y_s >= 0 ? my : -1;
+ my = y_s < src_conv_height ? my : -1;
+
+ const unsigned int addr_out = mi + ki * m0 + y * (dst_width) + z * (dst_width * dst_height);
+ out[addr_out] = my;
+ }
+ }
+ }
+ }
+
+ return out;
+}
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.h b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h
new file mode 100644
index 0000000000..f4a90dfd9f
--- /dev/null
+++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H
+#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_out, const TensorShape &shape_out,
+ const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H */ \ No newline at end of file
diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
index 0a23fa19bb..a7c8a784d9 100644
--- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,6 +63,15 @@ SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float e
return dst;
}
+template <>
+SimpleTensor<uint8_t> mean_std_normalization_layer(const SimpleTensor<uint8_t> &src, float epsilon)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = mean_std_normalization_layer<float>(src_tmp, epsilon);
+ SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info());
+ return dst;
+}
+
template SimpleTensor<float> mean_std_normalization_layer(const SimpleTensor<float> &src, float epsilon);
template SimpleTensor<half> mean_std_normalization_layer(const SimpleTensor<half> &src, float epsilon);
} // namespace reference
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 6f122b1bf5..7aa3011d8f 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "Permute.h"
#include "arm_compute/core/Types.h"
+
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -42,11 +43,11 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm)
permute(dst_shape, perm);
// Create reference
- SimpleTensor<T> dst{ dst_shape, src.data_type(), src.num_channels(), src.quantization_info() };
+ SimpleTensor<T> dst{dst_shape, src.data_type(), src.num_channels(), src.quantization_info()};
// Compute reference
const uint32_t num_elements = src.num_elements();
- for(uint32_t i = 0; i < num_elements; ++i)
+ for (uint32_t i = 0; i < num_elements; ++i)
{
const Coordinates src_coords = index2coord(src.shape(), i);
Coordinates dst_coords = src_coords;
@@ -58,13 +59,14 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm)
return dst;
}
-template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm);
-template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
-template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
+template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm);
+template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
+template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm);
template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm);
-template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
-template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);
+template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
+template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);
+template SimpleTensor<bfloat16> permute(const SimpleTensor<bfloat16> &src, PermutationVector perm);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Pooling3dLayer.cpp b/tests/validation/reference/Pooling3dLayer.cpp
new file mode 100644
index 0000000000..2e8f3a0b92
--- /dev/null
+++ b/tests/validation/reference/Pooling3dLayer.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Pooling3dLayer.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices)
+{
+ TensorShape pooled_shape = compute_pool3d_shape(src.shape(), pool3d_info);
+ SimpleTensor<T> dst{ pooled_shape, src.data_type(), 1 };
+
+ if(indices != nullptr)
+ {
+ *indices = SimpleTensor<uint32_t> { pooled_shape, DataType::U32, 1 };
+ }
+
+ const int idx_channel = 0;
+ const int idx_width = 1;
+ const int idx_height = 2;
+ const int idx_depth = 3;
+ const int idx_batch = 4;
+
+ const int pool_size_width = pool3d_info.is_global_pooling ? src.shape()[idx_width] : pool3d_info.pool_size.width;
+ const int pool_size_height = pool3d_info.is_global_pooling ? src.shape()[idx_height] : pool3d_info.pool_size.height;
+ const int pool_size_depth = pool3d_info.is_global_pooling ? src.shape()[idx_depth] : pool3d_info.pool_size.depth;
+
+ const int pool_stride_width = static_cast<int>(pool3d_info.stride.width);
+ const int pool_stride_height = static_cast<int>(pool3d_info.stride.height);
+ const int pool_stride_depth = static_cast<int>(pool3d_info.stride.depth);
+
+ const int pad_left = static_cast<int>(pool3d_info.padding.left);
+ const int pad_top = static_cast<int>(pool3d_info.padding.top);
+ const int pad_front = static_cast<int>(pool3d_info.padding.front);
+
+ const int pad_right = static_cast<int>(pool3d_info.padding.right);
+ const int pad_bottom = static_cast<int>(pool3d_info.padding.bottom);
+ const int pad_back = static_cast<int>(pool3d_info.padding.back);
+
+ const int num_channels = static_cast<int>(src.shape()[idx_channel]);
+ const int num_batches = static_cast<int>(src.shape()[idx_batch]);
+
+ ARM_COMPUTE_ERROR_ON(num_channels != static_cast<int>(dst.shape()[idx_channel]));
+ ARM_COMPUTE_ERROR_ON(num_batches != static_cast<int>(dst.shape()[idx_batch]));
+
+ const int w_src = static_cast<int>(src.shape()[idx_width]);
+ const int h_src = static_cast<int>(src.shape()[idx_height]);
+ const int d_src = static_cast<int>(src.shape()[idx_depth]);
+ const int w_dst = static_cast<int>(dst.shape()[idx_width]);
+ const int h_dst = static_cast<int>(dst.shape()[idx_height]);
+ const int d_dst = static_cast<int>(dst.shape()[idx_depth]);
+
+ const bool exclude_padding = pool3d_info.exclude_padding;
+
+ const int height_stride_src = num_channels * w_src;
+ const int depth_stride_src = height_stride_src * h_src;
+ const int batch_stride_src = depth_stride_src * d_src;
+ const int height_stride_dst = num_channels * w_dst;
+ const int depth_stride_dst = height_stride_dst * h_dst;
+ const int batch_stride_dst = depth_stride_dst * d_dst;
+
+ for(int b = 0; b < num_batches; ++b)
+ {
+ const int batch_offset_dst = b * batch_stride_dst;
+ const int batch_offset_src = b * batch_stride_src;
+ for(int c = 0; c < num_channels; ++c)
+ {
+ for(int d = 0; d < d_dst; ++d)
+ {
+ const int depth_offset_dst = d * depth_stride_dst;
+ for(int h = 0; h < h_dst; ++h)
+ {
+ const int height_offset_dst = h * height_stride_dst;
+ for(int w = 0; w < w_dst; ++w)
+ {
+ int wstart = w * pool_stride_width - pad_left;
+ int hstart = h * pool_stride_height - pad_top;
+ int dstart = d * pool_stride_depth - pad_front;
+ int wend = std::min(wstart + pool_size_width, w_src + pad_right);
+ int hend = std::min(hstart + pool_size_height, h_src + pad_bottom);
+ int dend = std::min(dstart + pool_size_depth, d_src + pad_back);
+
+ // this may not be equal to pool_w * pool_h * pool_d because of
+ // DimensionRoundingType choice (CEIL)
+ int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
+
+ // limit [start, end) to [0, w_src)
+ wstart = std::max(wstart, 0);
+ hstart = std::max(hstart, 0);
+ dstart = std::max(dstart, 0);
+ wend = std::min(wend, w_src);
+ hend = std::min(hend, h_src);
+ dend = std::min(dend, d_src);
+
+ auto max_val = -std::numeric_limits<T>::infinity();
+ int max_index{ 0 };
+ T avg_val = static_cast<T>(0.f);
+ T l2_val = static_cast<T>(0.f);
+
+ if(exclude_padding)
+ {
+ pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
+ }
+
+ for(int z = dstart; z < dend; ++z)
+ {
+ const int depth_offset_src = z * depth_stride_src;
+ for(int y = hstart; y < hend; ++y)
+ {
+ const int height_offset_src = y * height_stride_src;
+ for(int x = wstart; x < wend; ++x)
+ {
+ const auto val = static_cast<T>(
+ src[batch_offset_src + depth_offset_src + height_offset_src + x * num_channels + c]);
+ if(val > max_val)
+ {
+ max_val = val;
+ max_index = coord2index(src.shape(), Coordinates(c, x, y, z, 0));
+ }
+
+ avg_val += val;
+ l2_val += val * val;
+ }
+ }
+ }
+
+ avg_val /= pool_size;
+ l2_val = static_cast<T>(std::sqrt(l2_val / pool_size));
+
+ int dst_index = batch_offset_dst + depth_offset_dst + height_offset_dst + w * num_channels + c;
+ switch(pool3d_info.pool_type)
+ {
+ case PoolingType::MAX:
+ dst[dst_index] = static_cast<T>(max_val);
+ break;
+ case PoolingType::AVG:
+ dst[dst_index] = static_cast<T>(avg_val);
+ break;
+ case PoolingType::L2:
+ dst[dst_index] = static_cast<T>(l2_val);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Pooling Type should be either MAX, AVG or L2");
+ }
+
+ if(indices != nullptr)
+ {
+ (*indices)[dst_index] = max_index;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return dst;
+}
+
+template SimpleTensor<float> pooling_3d_layer(const SimpleTensor<float> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
+template SimpleTensor<half> pooling_3d_layer(const SimpleTensor<half> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ ARM_COMPUTE_UNUSED(output_qinfo);
+ return pooling_3d_layer_internal<T>(src, pool3d_info, indices);
+}
+
+template <>
+SimpleTensor<int8_t> pooling_3d_layer<int8_t>(const SimpleTensor<int8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices);
+ return convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo);
+}
+
+template <>
+SimpleTensor<uint8_t> pooling_3d_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices);
+ return convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo);
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Pooling3dLayer.h b/tests/validation/reference/Pooling3dLayer.h
new file mode 100644
index 0000000000..481a0d3024
--- /dev/null
+++ b/tests/validation/reference/Pooling3dLayer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_POOL3D_LAYER_H
+#define ARM_COMPUTE_TEST_POOL3D_LAYER_H
+
+#include "Utils.h"
+#include "arm_compute/core/Types.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices = nullptr);
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo = QuantizationInfo(),
+ SimpleTensor<uint32_t> *indices = nullptr);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_POOL3D_LAYER_H */
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 5f4edfe49c..bf7bd0c1df 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,6 @@ using namespace arm_compute::misc::shape_calculator;
template <typename T, typename ACC_T, typename std::enable_if<is_floating_point<T>::value, int>::type>
SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices, DataLayout data_layout)
{
- ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y()));
// Create reference
SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 };
auto pooled_shape = compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info);
@@ -84,20 +83,28 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
{
int wstart = w * pool_stride_x - pad_left;
int hstart = h * pool_stride_y - pad_top;
+
+ // Used to calculate kernel indices
+ int kh_start = std::max(0, -hstart);
+ int kw_start = std::max(0, -wstart);
+ int max_ker_index{ 0 };
+
int wend = std::min(wstart + pool_size_x, w_src);
int hend = std::min(hstart + pool_size_y, h_src);
wstart = std::max(wstart, 0);
hstart = std::max(hstart, 0);
- auto max_val = std::numeric_limits<ACC_T>::lowest();
+ auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest();
int max_index{ 0 };
- for(int y = hstart; y < hend; ++y)
+
+ for(int y = hstart, kh = kh_start; y < hend; ++y, ++kh)
{
- for(int x = wstart; x < wend; ++x)
+ for(int x = wstart, kw = kw_start; x < wend; ++x, ++kw)
{
const auto val = static_cast<ACC_T>(src[b * z_src * h_src * w_src + r * h_src * w_src + y * w_src + x]);
if(val > max_val)
{
- max_val = val;
+ max_val = val;
+ max_ker_index = pool_size_x * (kh) + (kw);
if(data_layout == DataLayout::NCHW)
{
max_index = coord2index(src.shape(), Coordinates(x, y, r, 0));
@@ -113,7 +120,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
dst[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val);
if(indices)
{
- (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = max_index;
+ (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = (info.use_kernel_indices) ? max_ker_index : max_index;
}
}
}
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index 27665375c3..ad7ba7ac43 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index ffb79f86c5..c189bc2d47 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,6 @@
* SOFTWARE.
*/
#include "ReductionOperation.h"
-
#include "tests/validation/Helpers.h"
#include <algorithm>
@@ -39,7 +38,7 @@ namespace reference
namespace
{
template <typename T, typename OT>
-OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
+OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy)
{
using type = typename std::remove_cv<OT>::type;
T res;
@@ -99,7 +98,14 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
}
if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
{
- int_res /= reduce_elements;
+ // Only use rounding in aarch64 to be consistent with kernel
+#ifdef __aarch64__
+ // Divide in float format, then rounded to nearest and implicitly cast back to int
+ int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy);
+#else // defined(__aarch64__)
+ ARM_COMPUTE_UNUSED(policy);
+ int_res /= reduce_elements; // Legacy compatibility
+#endif // __aarch64
}
res = static_cast<type>(int_res);
}
@@ -175,12 +181,12 @@ OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOper
} // namespace
template <typename T, typename OT>
-SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, RoundingPolicy policy)
{
// Create reference
- const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
- DataType output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
- SimpleTensor<OT> dst{ dst_shape, output_data_type, 1, src.quantization_info() };
+ const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
+ SimpleTensor<OT> dst{ dst_shape, output_type, 1, src.quantization_info() };
const unsigned int src_width = src.shape().x();
const unsigned int src_height = src.shape().y();
const unsigned int src_depth = src.shape().z();
@@ -197,7 +203,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + du * reduce_elems;
dst[du] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
}
}
break;
@@ -213,7 +219,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
}
}
}
@@ -232,7 +238,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
}
}
}
@@ -254,7 +260,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
}
}
}
@@ -269,74 +275,89 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
}
template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
ARM_COMPUTE_UNUSED(quantization_info_output);
- return compute_reduction_operation<T, OT>(src, dst_shape, axis, op);
+ return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, output_type, policy);
}
template <>
-SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8)
{
// If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy);
}
else
{
SimpleTensor<float> src_f = convert_from_asymmetric(src);
- SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
+ SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type);
return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
}
}
else
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy);
}
}
template <>
-SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis,
+ ReductionOperation op, DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8_SIGNED)
{
// If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy);
}
else
{
SimpleTensor<float> src_f = convert_from_asymmetric(src);
- SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
+ SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type);
return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
}
}
else
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy);
}
}
template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(),
+ RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+
template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+template SimpleTensor<int64_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(),
+ RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
index 9c9e721b29..fb2e7a7093 100644
--- a/tests/validation/reference/ReductionOperation.h
+++ b/tests/validation/reference/ReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_REDUCTION_OPERATION_H
#define ARM_COMPUTE_TEST_REDUCTION_OPERATION_H
+#include "arm_compute/core/Rounding.h"
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -36,8 +37,8 @@ namespace validation
namespace reference
{
template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp
deleted file mode 100644
index 33c5a7de68..0000000000
--- a/tests/validation/reference/Remap.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Remap.h"
-
-#include "Utils.h"
-#include "tests/validation/Helpers.h"
-
-#include <algorithm>
-#include <array>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
- T constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported");
- SimpleTensor<T> out(in.shape(), in.data_type());
- ARM_COMPUTE_ERROR_ON(out.num_elements() != map_x.num_elements());
- const int width = in.shape().x();
- const int height = in.shape().y();
- const uint32_t num_elements = out.num_elements();
- for(uint32_t idx = 0; idx < num_elements; idx++)
- {
- const Coordinates id_out = index2coord(out.shape(), idx);
- valid_mask[idx] = 1;
- Coordinates src_idx = id_out; // need to setup all coordinates and not just xy
- if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width))
- {
- switch(policy)
- {
- case InterpolationPolicy::NEAREST_NEIGHBOR:
- {
- src_idx.set(0, static_cast<int>(std::floor(map_x[idx])));
- src_idx.set(1, static_cast<int>(std::floor(map_y[idx])));
- out[idx] = in[coord2index(in.shape(), src_idx)];
- break;
- }
- case InterpolationPolicy::BILINEAR:
- {
- (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ?
- out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) :
- valid_mask[idx] = 0;
- break;
- }
- case InterpolationPolicy::AREA:
- default:
- ARM_COMPUTE_ERROR("Interpolation not supported");
- break;
- }
- }
- else
- {
- if(border_mode == BorderMode::UNDEFINED)
- {
- valid_mask[idx] = 0;
- }
- else
- {
- switch(policy)
- {
- case InterpolationPolicy::NEAREST_NEIGHBOR:
- out[idx] = constant_border_value;
- break;
- case InterpolationPolicy::BILINEAR:
- out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value);
- break;
- case InterpolationPolicy::AREA:
- default:
- break;
- }
- }
- }
- }
-
- return out;
-}
-
-template SimpleTensor<uint8_t> remap(const SimpleTensor<uint8_t> &src, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<uint8_t> &valid_mask, InterpolationPolicy policy,
- BorderMode border_mode,
- uint8_t constant_border_value);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/reference/Reorder.cpp b/tests/validation/reference/Reorder.cpp
new file mode 100644
index 0000000000..8abb372596
--- /dev/null
+++ b/tests/validation/reference/Reorder.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Reorder.h"
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+
+/*
+ * Generic transform.
+ *
+ * Assuming the untransposed case, this works by first reading <BlockBy>
+ * consecutive values from the first input row. This same number of values
+ * are then read from the next <IntBy-1> rows. Now return to the first
+ * input row and repeat.
+ *
+ * Need to cope with the work requested in either dimension not actually
+ * being a multiple of the block sizes.
+ */
+template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, typename d_type, arm_gemm::VLType vlt>
+struct Transform_ref
+{
+ template <typename TOut, typename TIn>
+ static void Transform(TOut &out, const TIn in, const int stride,
+ const int y0, const int ymax, const int x0, const int xmax)
+ {
+ // NOTE: This code is disabled to avoid the call to get_vector_length(), so templated transforms will not be
+ // correct for SVE. This is not an issue as we have specializations for all SVE cases.
+ // For SVE cases we multiply the interleave factor by the vector length.
+ // const unsigned int IntBy = tIntBy * (vlt == VLType::SVE ? get_vector_length<TOut>() / BlockBy : 1);
+ const unsigned int IntBy = tIntBy;
+ int out_index = 0;
+
+ const int n_whole_y_blocks = (ymax - y0) / IntBy;
+ const int y_remainders = (ymax - y0) % IntBy;
+ const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
+
+ const int n_whole_x_blocks = (xmax - x0) / BlockBy;
+ const int x_remainders = (xmax - x0) % BlockBy;
+ const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0);
+
+ // "Y" loop: advance down the rows of the source IntBy rows at a time.
+ // Set up fill_rows to show the number rows to copy from, and blank_rows
+ // for the number of blank rows to add.
+ for(int y_block = 0; y_block < n_y_blocks; y_block++)
+ {
+ const int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders;
+ const int blank_rows = IntBy - fill_rows;
+
+ const int y_base = y0 + (y_block * IntBy);
+
+ // So now advance along this block of rows, BlockBy columns at a time.
+ for(int x_block = 0; x_block < n_x_blocks; x_block++)
+ {
+ const int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders;
+ const int blank_cols = BlockBy - fill_cols;
+
+ const int x_base = x0 + (x_block * BlockBy);
+
+ for(int row = 0; row < fill_rows; row++)
+ {
+ for(int col = 0; col < fill_cols; col++)
+ {
+ // In-range copy. If it's transposed, we reverse the sense of rows and columns here.
+ if(Transposed)
+ {
+ out[out_index] = in[(x_base + col) * stride + y_base + row];
+ out_index++;
+ }
+ else
+ {
+ out[out_index] = in[(y_base + row) * stride + x_base + col];
+ out_index++;
+ }
+ }
+ // "col" tail - row is in range but column is out of range.
+ for(int col = 0; col < blank_cols; col++)
+ {
+ out[out_index] = 0;
+ out_index++;
+ }
+ }
+ // "row" tail - row is out of range so fill with zeros always.
+ const d_type zeroval = 0;
+ const int pads = blank_rows * (fill_cols + blank_cols);
+
+ for(int i = 0; i < pads; i++)
+ {
+ out[out_index] = zeroval;
+ }
+
+ out_index += pads;
+ }
+ }
+ }
+};
+
+template <typename T>
+SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf)
+{
+ SimpleTensor<T> dst{ output_shape, src.data_type() };
+ const int cols = src.shape()[0];
+ const int rows = src.shape()[1];
+
+ switch(output_wf)
+ {
+ case WeightFormat::OHWIo4:
+ {
+ Transform_ref<4, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols);
+ break;
+ }
+ case WeightFormat::OHWIo8:
+ {
+ Transform_ref<8, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return dst;
+}
+
+template SimpleTensor<float> reorder_layer(const SimpleTensor<float> &src, const TensorShape &output_shape, WeightFormat output_wf);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Remap.h b/tests/validation/reference/Reorder.h
index 0726f75965..94ee5078f8 100644
--- a/tests/validation/reference/Remap.h
+++ b/tests/validation/reference/Reorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,10 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_REMAP_H
-#define ARM_COMPUTE_TEST_REMAP_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_REORDER
+#define ACL_TESTS_VALIDATION_REFERENCE_REORDER
#include "tests/SimpleTensor.h"
+#include "tests/Types.h"
namespace arm_compute
{
@@ -35,10 +36,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
- T constant_border_value = 0);
+SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REMAP_H */
+#endif /* ACL_TESTS_VALIDATION_REFERENCE_REORDER */
diff --git a/tests/validation/reference/ReshapeLayer.cpp b/tests/validation/reference/ReshapeLayer.cpp
index daea001be6..30a58dd65b 100644
--- a/tests/validation/reference/ReshapeLayer.cpp
+++ b/tests/validation/reference/ReshapeLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,14 +44,15 @@ SimpleTensor<T> reshape_layer(const SimpleTensor<T> &src, const TensorShape &out
return dst;
}
-template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape);
template SimpleTensor<uint16_t> reshape_layer(const SimpleTensor<uint16_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape);
template SimpleTensor<uint32_t> reshape_layer(const SimpleTensor<uint32_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape);
-template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape);
-template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape);
+template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape);
+template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape);
+template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape);
+template SimpleTensor<bfloat16> reshape_layer(const SimpleTensor<bfloat16> &src, const TensorShape &output_shape);
/** [ReshapeLayer] **/
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/Reverse.cpp b/tests/validation/reference/Reverse.cpp
index c6c4614278..7924f900d1 100644
--- a/tests/validation/reference/Reverse.cpp
+++ b/tests/validation/reference/Reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,8 +35,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis)
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis)
{
+ ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
ARM_COMPUTE_ERROR_ON(axis.shape().num_dimensions() > 1);
ARM_COMPUTE_ERROR_ON(axis.shape().x() > 4);
@@ -48,10 +49,32 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
const unsigned int depth = src.shape()[2];
const unsigned int batches = src.shape()[3];
+ const int rank = src.shape().num_dimensions();
+
std::array<bool, 4> to_reverse = { { false, false, false, false } };
for(int i = 0; i < axis.num_elements(); ++i)
{
- to_reverse[axis[i]] = true;
+ int axis_i = axis[i];
+
+ // The values of axis tensor must be between [-rank, rank-1].
+ if((axis_i < -rank) || (axis_i >= rank))
+ {
+ ARM_COMPUTE_ERROR("the values of the axis tensor must be within [-rank, rank-1].");
+ }
+
+ // In case of negative axis value i.e targeted axis(i) = rank + axis(i)
+ if(axis_i < 0)
+ {
+ axis_i = rank + axis_i;
+ }
+
+ // Reverse ACL axis indices convention i.e. (inverted)axis = (tensor_rank - 1) - axis
+ if(use_inverted_axis)
+ {
+ axis_i = (rank - 1) - axis_i;
+ }
+
+ to_reverse[axis_i] = true;
}
const uint32_t num_elements = src.num_elements();
@@ -73,9 +96,9 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
return dst;
}
-template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &axis);
-template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &axis);
-template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &axis);
+template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
+template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
+template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Reverse.h b/tests/validation/reference/Reverse.h
index 4a28da7270..30926b05a5 100644
--- a/tests/validation/reference/Reverse.h
+++ b/tests/validation/reference/Reverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_REVERSE_H
-#define ARM_COMPUTE_TEST_REVERSE_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
+#define ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
#include "tests/SimpleTensor.h"
@@ -35,9 +35,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis);
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis = false);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REVERSE_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 71e98fd776..2f429cb29b 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,6 @@
#include "Scale.h"
#include "Utils.h"
-#include "arm_compute/core/utils/misc/Utility.h"
#include "src/core/utils/ScaleUtils.h"
#include "support/Rounding.h"
@@ -183,14 +182,15 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale
template <typename T>
SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
+ ARM_COMPUTE_UNUSED(output_quantization_info);
return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale, align_corners);
}
template <>
SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
SimpleTensor<uint8_t> dst;
if(src.quantization_info().uniform().scale != 0.f)
@@ -198,7 +198,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info());
SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners);
- dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info());
+ dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_quantization_info);
}
else
{
@@ -209,7 +209,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo
template <>
SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int8_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
SimpleTensor<int8_t> dst;
if(src.quantization_info().uniform().scale != 0.f)
@@ -217,7 +217,7 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
float constant_border_value_f = dequantize_qasymm8_signed(constant_border_value, src.quantization_info());
SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners);
- dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info());
+ dst = convert_to_asymmetric<int8_t>(dst_tmp, output_quantization_info);
}
else
{
@@ -227,11 +227,11 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float
}
template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h
index c66af8d94e..c32c07d1c0 100644
--- a/tests/validation/reference/Scale.h
+++ b/tests/validation/reference/Scale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,7 +37,7 @@ namespace reference
{
template <typename T>
SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0,
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false);
+ SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false, QuantizationInfo output_quantization_info = QuantizationInfo());
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp
new file mode 100644
index 0000000000..55c48a9002
--- /dev/null
+++ b/tests/validation/reference/ScatterLayer.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ScatterLayer.h"
+#include "tests/validation/Helpers.h"
+#include "arm_compute/core/TensorShape.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+
+template <typename T>
+T reduce_op(const T &current,const T &update,const ScatterFunction func)
+{
+ switch(func)
+ {
+ case ScatterFunction::Update:
+ return update;
+ break;
+ case ScatterFunction::Add:
+ return current + update;
+ break;
+ case ScatterFunction::Sub:
+ return current - update;
+ break;
+ case ScatterFunction::Max:
+ return std::max(current, update);
+ break;
+ case ScatterFunction::Min:
+ return std::min(current, update);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported Scatter function");
+ break;
+ }
+}
+
+template float reduce_op(const float &current,const float &update,const ScatterFunction func);
+template half reduce_op(const half &current,const half &update,const ScatterFunction func);
+}
+
+// NOTE: This function expects collapsed tensors as input.
+// Batch dims for update/indices tensors should be collapsed into a single dim.
+// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function.
+template <typename T>
+SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info)
+{
+ // 1. If zero initialization variable is false, copy src data to dst.
+ SimpleTensor<T> dst{ out_shape, src.data_type(), 1 };
+ if(!info.zero_initialization)
+ {
+ std::copy_n(src.data(), src.num_elements(), dst.data());
+ }
+
+ // Number of elements between each value of the dim being iterated through
+ const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1);
+ const unsigned int no_output_dims = out_shape.num_dimensions();
+
+ // Calculate output stride at given index for all output dims.
+ std::vector<unsigned int> out_stride_at_idx(no_output_dims);
+ for (unsigned int i = 0 ; i < no_output_dims; i++)
+ {
+ out_stride_at_idx[i] = out_shape.total_size_lower(i);
+ }
+
+ const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]);
+ const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]);
+
+ // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor.
+ for(unsigned int i = 0; i < indices_y_dim; i++)
+ {
+ // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim
+ // Starting index for both the update and indices tensors.
+ const unsigned int update_dim_start = i * data_stride;
+ const unsigned int indices_dim_start = i * indices_x_dim;
+ bool out_of_bounds = false;
+ unsigned int out_offset_acc = 0;
+
+ // Iterate over each indices value for the relevant batch and accumulate the offset.
+ for(unsigned int j = 0; j < indices_x_dim; j++)
+ {
+ // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k
+ const int index_value = indices[indices_dim_start + j];
+ const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value.
+ if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0)
+ {
+ out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation
+ }
+ else
+ {
+ out_of_bounds = true;
+ break;
+ }
+ }
+
+ // If not out of bounds, copy update tensor elements to output
+ if(!out_of_bounds)
+ {
+ for (unsigned int j = 0 ; j < data_stride; j++)
+ {
+ dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func);
+ }
+ }
+ }
+ return dst;
+}
+
+template <typename T>
+SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info)
+{
+ return scatter_layer_internal<T>(src, updates, indices, out_shape, info);
+}
+
+template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ScatterLayer.h b/tests/validation/reference/ScatterLayer.h
new file mode 100644
index 0000000000..97d5e70b0d
--- /dev/null
+++ b/tests/validation/reference/ScatterLayer.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
+#define ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
+
+#include "Utils.h"
+#include "arm_compute/function_info/ScatterInfo.h"
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info);
+
+template <typename T>
+SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h
index 1f593bb696..e5ecc66545 100644
--- a/tests/validation/reference/UtilsQuantizedAsymm.h
+++ b/tests/validation/reference/UtilsQuantizedAsymm.h
@@ -32,6 +32,22 @@ namespace test
{
namespace validation
{
+namespace
+{
+#if __clang__
+// This has been tested on clang 7.0.2 (__clang_major__ == 7 && __clang_minor__ == 0 && __clang_patchlevel__ == 2)
+inline int64_t to_int64(int32_t val)
+{
+ return static_cast<int64_t>(val) | ((val < 0) ? (((1ll << 32) - 1) << 32) : 0);
+}
+#else // __clang__
+inline int64_t to_int64(int32_t val)
+{
+ return static_cast<int64_t>(val);
+}
+#endif // __clang__
+} // namespace
+
/** Rounded to nearest division by a power-of-two. */
inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
{
@@ -43,12 +59,12 @@ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
/** Multiplication of two integers. The same as ARMv7 Arm® Neon™ VQRDMULH instruction. */
inline int32_t asymm_int_mult(int32_t a, int32_t b)
{
- bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
- int64_t a_64(a);
- int64_t b_64(b);
- int64_t ab_64 = a_64 * b_64;
- int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
- int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
+ const bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
+ const int64_t a_64 = to_int64(a);
+ const int64_t b_64 = to_int64(b);
+ const int64_t ab_64 = a_64 * b_64;
+ const int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ const int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
}