// // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ConcatTestImpl.hpp" #include #include #include #include #include #include using namespace armnn; using namespace armnnUtils; // // Helper functions and templates // OriginsDescriptor CreateDescriptorForConcat( const std::vector & inputTensorInfos, unsigned int concatDim) { std::vector shapes; shapes.reserve(inputTensorInfos.size()); for (const TensorInfo& it: inputTensorInfos) { shapes.push_back(it.GetShape()); } return CreateDescriptorForConcatenation(shapes.begin(), shapes.end(), concatDim); } // // Concat is only supported for N and C dimensions for NCHW and the inner most dimension // In case of <4 dimensions we need to make sure that the concat dimensions are at least // the 3rd slowest iterating one or the inner most dimension. // bool NeedPermuteForConcat( const std::vector & inputTensorInfos, unsigned int concatDim) { // See note above. Additionally we expect the input shapes to have the // same number of dimensions. unsigned int nDimensions = 0; // Determine the number of dimensions as well as sanity check them // agains test implementation issues. for (auto && tensorInfo : inputTensorInfos) { if (!nDimensions) { nDimensions = tensorInfo.GetShape().GetNumDimensions(); } else { ARMNN_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(), "Input shapes must have the same number of dimensions"); } } return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1)); } TensorShape ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape) { unsigned int numDims = inputShape.GetNumDimensions(); if (numDims >= 3) { // Nothing to do if the inputShape has at least 3 dimensions. return inputShape; } std::vector newDims(size_t(3), 1u); unsigned int expandedBy = 3 - numDims; for (unsigned int i=0; i & permutations) { ARMNN_ASSERT_MSG(numDimensions <= 3, "Only dimensions 1,2 and 3 are supported by this helper"); unsigned int expandedBy = 3 - numDimensions; unsigned int expandedConcatAxis = concatDim + expandedBy; if (expandedConcatAxis == 2) { concatDim = 0; PermutationVector forwardPermutation({1, 2, 0}); PermutationVector reversePermutation({2, 0, 1}); permutations = std::make_pair(forwardPermutation, reversePermutation); } else if (expandedConcatAxis == 1) { concatDim = 0; PermutationVector forwardPermutation({2, 0, 1}); PermutationVector reversePermutation({1, 2, 0}); permutations = std::make_pair(forwardPermutation, reversePermutation); } else { ARMNN_ASSERT(expandedConcatAxis == 0); concatDim = 0; } } template void PermuteTensorData( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const PermutationVector& mappings, TensorInfo & inputTensorInfo, const T * inputData, std::vector& outputData) { IgnoreUnused(memoryManager); ARMNN_ASSERT_MSG(inputData != nullptr, "inputData must not be null"); if (inputData == nullptr) { // Nullptr is an error in the test. By returning without doing the concatenation // I expect the caller to fail the test. It still makes sense to report this as // an assert for Debug builds. return; } TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); PermuteQueueDescriptor queueDescriptor; queueDescriptor.m_Parameters = PermuteDescriptor{mappings}; WorkloadInfo workloadInfo; AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Permute, queueDescriptor, workloadInfo); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData); workload->PostAllocationConfigure(); workload->Execute(); outputData.resize(outputTensorInfo.GetNumElements()); CopyDataFromITensorHandle(&outputData[0], outputHandle.get()); inputTensorInfo = outputTensorInfo; } // // Permute the input tensors so we can do a supported concatenation. // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions // at the front. Finally this function tells what the output shape // of the permuted concatenated tensor is going to be. // template void PermuteInputsForConcat( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, std::vector & inputTensorInfos, std::vector & inputData, std::vector> & inputDataStorage, PermutationVector & permuteVector, unsigned int & concatDim, TensorInfo & outputTensorInfo) { IgnoreUnused(memoryManager); ARMNN_ASSERT_MSG(inputTensorInfos.size() > 1, "Expecting more than one tensor to be concatenated here"); unsigned int numDims = 0; unsigned int nthInput = 0; const PermutationVector identity({0, 1, 2}); std::pair permutations = std::make_pair(identity, identity); inputDataStorage.resize(inputData.size()); for (auto && tensorInfo : inputTensorInfos) { if (numDims == 0) { numDims = tensorInfo.GetShape().GetNumDimensions(); Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); // Store the reverese permutation. permuteVector = permutations.second; ARMNN_ASSERT_MSG(!permuteVector.IsEqual(identity), "Test logic error, we don't need permutation, so we shouldn't arrive here"); } else { ARMNN_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(), "All inputs must have the same number of dimensions"); } TensorInfo newTensorInfo = tensorInfo; newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape())); PermuteTensorData(workloadFactory, memoryManager, tensorHandleFactory, permutations.first, newTensorInfo, inputData[nthInput], inputDataStorage[nthInput]); inputData[nthInput] = inputDataStorage[nthInput].data(); inputTensorInfos[nthInput] = newTensorInfo; ++nthInput; } outputTensorInfo.SetShape( armnnUtils::Permuted( ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()), permutations.first)); } // // This is the pair of PermuteInputsForConcat(...) which permutes back // the output of the concatenation so we can check it against an expected // output. // template void PermuteOutputForConcat( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const TensorInfo & tensorInfo, const PermutationVector & permuteVector, std::unique_ptr && inputDataHandle, T * data) { ARMNN_ASSERT_MSG(data != nullptr, "data must not be null"); if (data == nullptr) { // Nullptr is an error in the test. By returning without doing the permutation // I expect the caller to fail the test. It still makes sense to report this as // an assert for Debug builds. return; } TensorInfo resultTensorInfo = tensorInfo; std::vector inputData(tensorInfo.GetNumElements()); std::vector outputData; CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get()); PermuteTensorData(workloadFactory, memoryManager, tensorHandleFactory, permuteVector, resultTensorInfo, &inputData[0], outputData); ::memcpy(data, &outputData[0], sizeof(T)*outputData.size()); } template void Concatenate( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, std::initializer_list inputTensorInfosOrig, std::initializer_list inputsOrig, const TensorInfo& outputTensorInfoOrig, T * output, unsigned int concatDim, bool useSubtensor) { ARMNN_ASSERT_MSG(output != nullptr, "output must not be null"); if (output == nullptr) { // Nullptr is an error in the test. By returning without doing the permutation // I expect the caller to fail the test. It still makes sense to report this as // an assert for Debug builds. return; } // Saves a copy of the parameters which we might need to change. std::vector inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); std::vector inputs = inputsOrig; TensorInfo outputTensorInfo = outputTensorInfoOrig; PermutationVector permuteVector{0, 1, 2}; // Holds and automatically releases memory for the reshaped input data. std::vector> tmpInputDataStorage; const size_t inputCount = inputTensorInfos.size(); bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim); if (needPermuteForConcat) { // // We need to permute the inputs, because concatenation along // the requested axis is not supported. // PermuteInputsForConcat(workloadFactory, memoryManager, tensorHandleFactory, inputTensorInfos, inputs, tmpInputDataStorage, permuteVector, concatDim, outputTensorInfo); } WorkloadInfo workloadInfo; std::vector> inputHandles; inputHandles.reserve(inputCount); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); ConcatQueueDescriptor queueDescriptor; OriginsDescriptor viewsDescriptor = CreateDescriptorForConcat(inputTensorInfos, concatDim); queueDescriptor.m_Parameters = viewsDescriptor; if (useSubtensor) { queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) { queueDescriptor.m_ViewOrigins.emplace_back(std::vector(viewsDescriptor.GetViewOrigin(i), viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); } outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); for (unsigned int i = 0; i < inputCount; ++i) { const TensorInfo& inputTensorInfo = inputTensorInfos[i]; std::unique_ptr inputHandle = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), queueDescriptor.m_ViewOrigins[i].m_Origin.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo); inputHandles.emplace_back(std::move(inputHandle)); } } else { for (unsigned int i = 0; i < inputCount; ++i) { std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfos[i]); inputHandles.emplace_back(std::move(inputHandle)); } } for (unsigned int i = 0; i < inputCount; ++i) { AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get()); } AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, queueDescriptor, workloadInfo); for (auto& inputHandle : inputHandles) { inputHandle->Allocate(); } outputHandle->Allocate(); unsigned int nextInputId = 0; for (auto& inputHandle : inputHandles) { CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]); ++nextInputId; } workload->PostAllocationConfigure(); workload->Execute(); if (needPermuteForConcat) { PermuteOutputForConcat(workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, permuteVector, std::move(outputHandle), output); } else { CopyDataFromITensorHandle(output, outputHandle.get()); } } // // Implementation templates // template> LayerTestResult Concat1dTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector({ 1.0f, 2.0f, 3.0f }, qScale, qOffset); auto input1 = QuantizedVector({ 4.0f, 5.0f, 6.0f }, qScale, qOffset); auto input2 = QuantizedVector({ 7.0f, 8.0f, 9.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 0, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat2dTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const TensorInfo& outputTensorInfo, unsigned int dimension, const float qScale, const int32_t qOffset) { TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, // Batch 1 10.0f, 11.0f, 12.0f, }, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0 4.0f, 5.0f, 6.0f, // Batch 1 13.0f, 14.0f, 15.0f, }, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 0 7.0f, 8.0f, 9.0f, // Batch 1 16.0f, 17.0f, 18.0f, }, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), dimension, true); result.m_ActualData = output; return result; } template> LayerTestResult Concat2dDim0TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, // Batch 1 10.0f, 11.0f, 12.0f, // Batch 2 4.0f, 5.0f, 6.0f, // Batch 3 13.0f, 14.0f, 15.0f, // Batch 4 7.0f, 8.0f, 9.0f, // Batch 5 16.0f, 17.0f, 18.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat2dDim1TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, // Batch 1 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat2dDim0DiffInputDimsTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, // Batch 1 10.0f, 11.0f, 12.0f, }, qScale, qOffset); TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0 4.0f, 5.0f, 6.0f, // Batch 1 13.0f, 14.0f, 15.0f, // Batch 0 7.0f, 8.0f, 9.0f, }, qScale, qOffset); TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 1 16.0f, 17.0f, 18.0f, }, qScale, qOffset); TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 0, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, // Batch 1 10.0f, 11.0f, 12.0f, // Batch 2 4.0f, 5.0f, 6.0f, // Batch 3 13.0f, 14.0f, 15.0f, // Batch 4 7.0f, 8.0f, 9.0f, // Batch 5 16.0f, 17.0f, 18.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat2dDim1DiffInputDimsTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, // Batch 1 10.0f, 11.0f, 12.0f, }, qScale, qOffset); TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, // Batch 1 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, }, qScale, qOffset); TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 0 9.0f, // Batch 1 18.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 1, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { // Batch 0 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, // Batch 1 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat3dTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const TensorInfo& outputTensorInfo, unsigned int dimension, bool useSubtensor, float qScale, int32_t qOffset) { TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f }, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0, Channel 0 7.0f, 8.0f, // Batch 0, Channel 1 9.0f, 10.0f, // Batch 0, Channel 2 11.0f, 12.0f, // Batch 1, Channel 0 25.0f, 26.0f, // Batch 1, Channel 1 27.0f, 28.0f, // Batch 1, Channel 2 29.0f, 30.0f }, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 0, Channel 0 13.0f, 14.0f, // Batch 0, Channel 1 15.0f, 16.0f, // Batch 0, Channel 2 17.0f, 18.0f, // Batch 1, Channel 0 31.0f, 32.0f, // Batch 1, Channel 1 33.0f, 34.0f, // Batch 1, Channel 2 35.0f, 36.0f }, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), dimension, useSubtensor); result.m_ActualData = output; return result; } template> LayerTestResult Concat3dDim0TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat3dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f, // Batch 2, Channel 0 7.0f, 8.0f, // Batch 2, Channel 1 9.0f, 10.0f, // Batch 2, Channel 2 11.0f, 12.0f, // Batch 3, Channel 0 25.0f, 26.0f, // Batch 3, Channel 1 27.0f, 28.0f, // Batch 3, Channel 2 29.0f, 30.0f, // Batch 4, Channel 0 13.0f, 14.0f, // Batch 4, Channel 1 15.0f, 16.0f, // Batch 4, Channel 2 17.0f, 18.0f, // Batch 5, Channel 0 31.0f, 32.0f, // Batch 5, Channel 1 33.0f, 34.0f, // Batch 5, Channel 2 35.0f, 36.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat3dDim1TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat3dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 0, Channel 3 7.0f, 8.0f, // Batch 0, Channel 4 9.0f, 10.0f, // Batch 0, Channel 5 11.0f, 12.0f, // Batch 0, Channel 6 13.0f, 14.0f, // Batch 0, Channel 7 15.0f, 16.0f, // Batch 0, Channel 8 17.0f, 18.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f, // Batch 1, Channel 3 25.0f, 26.0f, // Batch 1, Channel 4 27.0f, 28.0f, // Batch 1, Channel 5 29.0f, 30.0f, // Batch 1, Channel 6 31.0f, 32.0f, // Batch 1, Channel 7 33.0f, 34.0f, // Batch 1, Channel 8 35.0f, 36.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat3dDim2TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat3dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, useSubtensor, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f, // Batch 0, Channel 1 3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f, // Batch 0, Channel 2 5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f, // Batch 1, Channel 0 19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f, // Batch 1, Channel 1 21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f, // Batch 1, Channel 2 23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat3dDim0DiffInputDimsTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType); auto input0 = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f }, qScale, qOffset); TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType); auto input1 = QuantizedVector( { // Batch 0, Channel 0 7.0f, 8.0f, // Batch 0, Channel 1 9.0f, 10.0f, // Batch 0, Channel 2 11.0f, 12.0f, }, qScale, qOffset); TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType); auto input2 = QuantizedVector( { // Batch 0, Channel 0 25.0f, 26.0f, // Batch 0, Channel 1 27.0f, 28.0f, // Batch 0, Channel 2 29.0f, 30.0f, // Batch 1, Channel 0 13.0f, 14.0f, // Batch 1, Channel 1 15.0f, 16.0f, // Batch 1, Channel 2 17.0f, 18.0f, // Batch 2, Channel 0 31.0f, 32.0f, // Batch 2, Channel 1 33.0f, 34.0f, // Batch 2, Channel 2 35.0f, 36.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 0, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f, // Batch 2, Channel 0 7.0f, 8.0f, // Batch 2, Channel 1 9.0f, 10.0f, // Batch 2, Channel 2 11.0f, 12.0f, // Batch 3, Channel 0 25.0f, 26.0f, // Batch 3, Channel 1 27.0f, 28.0f, // Batch 3, Channel 2 29.0f, 30.0f, // Batch 4, Channel 0 13.0f, 14.0f, // Batch 4, Channel 1 15.0f, 16.0f, // Batch 4, Channel 2 17.0f, 18.0f, // Batch 5, Channel 0 31.0f, 32.0f, // Batch 5, Channel 1 33.0f, 34.0f, // Batch 5, Channel 2 35.0f, 36.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat3dDim1DiffInputDimsTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f }, qScale, qOffset); TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0, Channel 0 7.0f, 8.0f, // Batch 0, Channel 1 9.0f, 10.0f, // Batch 0, Channel 2 11.0f, 12.0f, // Batch 0, Channel 3 25.0f, 26.0f, // Batch 1, Channel 0 27.0f, 28.0f, // Batch 1, Channel 1 29.0f, 30.0f, // Batch 1, Channel 2 13.0f, 14.0f, // Batch 1, Channel 3 15.0f, 16.0f, }, qScale, qOffset); TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 0, Channel 0 17.0f, 18.0f, // Batch 1, Channel 0 31.0f, 32.0f, }, qScale, qOffset); TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 1, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 0, Channel 3 7.0f, 8.0f, // Batch 0, Channel 4 9.0f, 10.0f, // Batch 0, Channel 5 11.0f, 12.0f, // Batch 0, Channel 6 25.0f, 26.0f, // Batch 0, Channel 7 17.0f, 18.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f, // Batch 1, Channel 3 27.0f, 28.0f, // Batch 1, Channel 4 29.0f, 30.0f, // Batch 1, Channel 5 13.0f, 14.0f, // Batch 1, Channel 6 15.0f, 16.0f, // Batch 1, Channel 7 31.0f, 32.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat3dDim2DiffInputDimsTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor, float qScale, int32_t qOffset) { TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, // Batch 0, Channel 1 3.0f, 4.0f, // Batch 0, Channel 2 5.0f, 6.0f, // Batch 1, Channel 0 19.0f, 20.0f, // Batch 1, Channel 1 21.0f, 22.0f, // Batch 1, Channel 2 23.0f, 24.0f }, qScale, qOffset); TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { // Batch 0, Channel 0 7.0f, // Batch 0, Channel 1 9.0f, // Batch 0, Channel 2 11.0f, // Batch 1, Channel 0 25.0f, // Batch 1, Channel 1 27.0f, // Batch 1, Channel 2 29.0f }, qScale, qOffset); TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset); auto input2 = QuantizedVector( { // Batch 0, Channel 0 13.0f, 14.0f, 50.0f, // Batch 0, Channel 1 15.0f, 16.0f, 51.0f, // Batch 0, Channel 2 17.0f, 18.0f, 52.0f, // Batch 1, Channel 0 31.0f, 32.0f, 53.0f, // Batch 1, Channel 1 33.0f, 34.0f, 54.0f, // Batch 1, Channel 2 35.0f, 36.0f, 55.0f, }, qScale, qOffset); TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, output.data(), 2, useSubtensor); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { // Batch 0, Channel 0 1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f, // Batch 0, Channel 1 3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f, // Batch 0, Channel 2 5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f, // Batch 1, Channel 0 19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f, // Batch 1, Channel 1 21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f, // Batch 1, Channel 2 23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f, }, qScale, qOffset); return result; } template> LayerTestResult Concat4dTestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const TensorInfo& outputTensorInfo, unsigned int dimension, bool useSubtensor, float qScale, int32_t qOffset) { TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }, qScale, qOffset); auto input1 = QuantizedVector( { 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }, qScale, qOffset); auto input2 = QuantizedVector( { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, {inputTensorInfo, inputTensorInfo, inputTensorInfo}, {input0.data(), input1.data(), input2.data()}, outputTensorInfo, output.data(), dimension, useSubtensor); result.m_ActualData = output; return result; } template> LayerTestResult Concat4dDim0TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat4dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDim1TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat4dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDim2TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat4dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, true, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 11.0f, 12.0f, 13.0f, 14.0f, 21.0f, 22.0f, 23.0f, 24.0f, 5.0f, 6.0f, 7.0f, 8.0f, 15.0f, 16.0f, 17.0f, 18.0f, 25.0f, 26.0f, 27.0f, 28.0f, 9.0f, 10.0f, 11.0f, 12.0f, 19.0f, 20.0f, 21.0f, 22.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDim3TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool useSubtensor) { TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset); LayerTestResult result = Concat4dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 3, useSubtensor, qScale, qOffset); result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 11.0f, 12.0f, 21.0f, 22.0f, 3.0f, 4.0f, 13.0f, 14.0f, 23.0f, 24.0f, 5.0f, 6.0f, 15.0f, 16.0f, 25.0f, 26.0f, 7.0f, 8.0f, 17.0f, 18.0f, 27.0f, 28.0f, 9.0f, 10.0f, 19.0f, 20.0f, 29.0f, 30.0f, 11.0f, 12.0f, 21.0f, 22.0f, 31.0f, 32.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDiffShapeDim0TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { constexpr unsigned int dimension = 0u; TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }, qScale, qOffset); TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, {inputTensorInfo0, inputTensorInfo1}, {input0.data(), input1.data()}, outputTensorInfo, output.data(), dimension, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDiffShapeDim1TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { constexpr unsigned int dimension = 1u; TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }, qScale, qOffset); TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, }, qScale, qOffset); TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, {inputTensorInfo0, inputTensorInfo1}, {input0.data(), input1.data()}, outputTensorInfo, output.data(), dimension, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDiffShapeDim2TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset) { constexpr unsigned int dimension = 2u; TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }, qScale, qOffset); TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, {inputTensorInfo0, inputTensorInfo1}, {input0.data(), input1.data()}, outputTensorInfo, output.data(), dimension, true); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 5.0f, 6.0f, 7.0f, 8.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 9.0f, 10.0f, 11.0f, 12.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f }, qScale, qOffset); return result; } template> LayerTestResult Concat4dDiffShapeDim3TestImpl( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool useSubtensor) { constexpr unsigned int dimension = 3u; TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset); auto input0 = QuantizedVector( { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }, qScale, qOffset); TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset); auto input1 = QuantizedVector( { 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f }, qScale, qOffset); TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset); LayerTestResult result(outputTensorInfo); std::vector output; output.resize(outputTensorInfo.GetNumElements()); Concatenate(workloadFactory, memoryManager, tensorHandleFactory, {inputTensorInfo0, inputTensorInfo1}, {input0.data(), input1.data()}, outputTensorInfo, output.data(), dimension, useSubtensor); result.m_ActualData = output; result.m_ExpectedData = QuantizedVector( { 1.0f, 2.0f, 11.0f, 12.0f, 13.0f, 3.0f, 4.0f, 14.0f, 15.0f, 16.0f, 5.0f, 6.0f, 17.0f, 18.0f, 19.0f, 7.0f, 8.0f, 20.0f, 21.0f, 22.0f, 9.0f, 10.0f, 23.0f, 24.0f, 25.0f, 11.0f, 12.0f, 26.0f, 27.0f, 28.0f }, qScale, qOffset); return result; } template LayerTestResult ConcatDifferentInputOutputQParamTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { IgnoreUnused(memoryManager); // Defines the tensor descriptors. TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType); TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType); TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType); std::vector inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()}); // Quantized input1 tensor. const float inputScale1 = 0.5f; const int32_t inputOffset1 = 5; std::vector input1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 }; // Quatized input2 tensor. const float inputScale2 = 0.2f; const int32_t inputOffset2 = 10; std::vector input2 = { 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 }; // Quantized output tensor. const float outputScale = 0.1f; const int32_t outputOffset = 20; std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput = { 0, 5, 74, 10, 15, 76, 20, 25, 78, 30, 35, 80, 40, 45, 82, 50, 55, 84, 60, 65, 86, 70, 75, 88, 80, 85, 90, 90, 95, 92, 100, 105, 94, 110, 115, 96, 120, 125, 98, 130, 135, 100, 140, 145, 102, 150, 155, 104, 160, 165, 106, 170, 175, 108 }; outputTensorInfo.SetQuantizationScale(outputScale); outputTensorInfo.SetQuantizationOffset(outputOffset); inputTensorInfo1.SetQuantizationScale(inputScale1); inputTensorInfo1.SetQuantizationOffset(inputOffset1); inputTensorInfo2.SetQuantizationScale(inputScale2); inputTensorInfo2.SetQuantizationOffset(inputOffset2); std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. ConcatQueueDescriptor::ViewOrigin window1(wOrigin1); std::vector wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1]. ConcatQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors(); std::unique_ptr inputHandle1 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo1); std::unique_ptr inputHandle2 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo2); ConcatQueueDescriptor data; OriginsDescriptor desc = CreateDescriptorForConcatenation( inputTensorShapes.begin(),inputTensorShapes.end(), 2); data.m_Parameters = desc; WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), input1.data()); CopyDataToITensorHandle(inputHandle2.get(), input2.data()); workload->PostAllocationConfigure(); workload->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } // // Explicit template specializations // template LayerTestResult, 3> ConcatDifferentInputOutputQParamTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor); template LayerTestResult, 3> ConcatDifferentInputOutputQParamTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor); // // Implementation functions // LayerTestResult ConcatTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { IgnoreUnused(memoryManager); unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; unsigned int inputWidth1 = 3; unsigned int inputHeight1 = 6; unsigned int inputChannels1 = 2; unsigned int inputWidth2 = 3; unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; // Define the tensor descriptors. TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::Float32); TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::Float32); TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::Float32); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f }; std::vector input1 = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f }; std::vector input2 = { 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, }; std::vector wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0]. ConcatQueueDescriptor::ViewOrigin window1(wOrigin1); std::vector wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1]. ConcatQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); std::unique_ptr inputHandle1 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo1); std::unique_ptr inputHandle2 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo2); ConcatQueueDescriptor data; WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), input1.data()); CopyDataToITensorHandle(inputHandle2.get(), input2.data()); workload->PostAllocationConfigure(); workload->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } LayerTestResult Concat1dTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat1dTestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat2dDim0Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat2dDim1Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat2dDim0DiffInputDimsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim0DiffInputDimsTestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat2dDim1DiffInputDimsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim1DiffInputDimsTestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat3dDim0Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat3dDim1Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat3dDim2Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat3dDim2TestImpl(workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.0f, 0); } LayerTestResult Concat3dDim0DiffInputDimsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim0DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat3dDim1DiffInputDimsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1DiffInputDimsTestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat3dDim2DiffInputDimsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat3dDim2DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.0f, 0); } LayerTestResult Concat4dDim0Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDim1Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDim2Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim2TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDim3Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat4dDim3TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, useSubtensor); } LayerTestResult Concat4dDiffShapeDim0Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDiffShapeDim1Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim1TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDiffShapeDim2Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim2TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult Concat4dDiffShapeDim3Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat4dDiffShapeDim3TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, useSubtensor); } LayerTestResult ConcatFloat16Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult ConcatBFloat16Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0); } LayerTestResult ConcatUint8DifferentQParamsTest( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { IgnoreUnused(memoryManager); unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; unsigned int inputWidth1 = 3; unsigned int inputHeight1 = 6; unsigned int inputChannels1 = 2; unsigned int inputWidth2 = 3; unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; // Defines the tensor descriptors. TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8); TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8); TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8); // Quantized input1 tensor. Range [-3, 1] const float inputScale1 = 0.015686f; const int32_t inputOffset1 = 192; std::vector input1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 }; // Quatized input2 tensor. Range [-1, 4] const float inputScale2 = 0.019608f; const int32_t inputOffset2 = 50; std::vector input2 = { 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 }; // Output has the same quantization parameters than input1, // so that only the requantization of input2 is required const float outputScale = 0.015686f; const int32_t outputOffset = 192; std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 176, 177, 178, 179, 181, 182, 183, 184, 186, 187, 188, 189, 191, 192, 193, 195, 196, 197 }; outputTensorInfo.SetQuantizationScale(outputScale); outputTensorInfo.SetQuantizationOffset(outputOffset); inputTensorInfo1.SetQuantizationScale(inputScale1); inputTensorInfo1.SetQuantizationOffset(inputOffset1); inputTensorInfo2.SetQuantizationScale(inputScale2); inputTensorInfo2.SetQuantizationOffset(inputOffset2); std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. ConcatQueueDescriptor::ViewOrigin window1(wOrigin1); std::vector wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. ConcatQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); std::unique_ptr inputHandle1 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo1); std::unique_ptr inputHandle2 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo2); ConcatQueueDescriptor data; WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), input1.data()); CopyDataToITensorHandle(inputHandle2.get(), input2.data()); workload->PostAllocationConfigure(); workload->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } LayerTestResult ConcatUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { IgnoreUnused(memoryManager); unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; unsigned int inputWidth1 = 3; unsigned int inputHeight1 = 6; unsigned int inputChannels1 = 2; unsigned int inputWidth2 = 3; unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; // Defines the tensor descriptors. TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8); TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8); TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8); // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them. const float scale = 0.13497836f; const int32_t offset = -7; outputTensorInfo.SetQuantizationScale(scale); outputTensorInfo.SetQuantizationOffset(offset); inputTensorInfo1.SetQuantizationScale(scale); inputTensorInfo1.SetQuantizationOffset(offset); inputTensorInfo2.SetQuantizationScale(scale); inputTensorInfo2.SetQuantizationOffset(offset); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 }; std::vector input1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 }; std::vector input2 = { 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 }; std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. ConcatQueueDescriptor::ViewOrigin window1(wOrigin1); std::vector wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. ConcatQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); std::unique_ptr inputHandle1 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo1); std::unique_ptr inputHandle2 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo2); ConcatQueueDescriptor data; WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), input1.data()); CopyDataToITensorHandle(inputHandle2.get(), input2.data()); workload->PostAllocationConfigure(); workload->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } LayerTestResult ConcatUint16Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { IgnoreUnused(memoryManager); unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; unsigned int inputWidth1 = 3; unsigned int inputHeight1 = 6; unsigned int inputChannels1 = 2; unsigned int inputWidth2 = 3; unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; // Defines the tensor descriptors. TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QSymmS16); TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QSymmS16); TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QSymmS16); // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them. const float scale = 0.13497836f; const int32_t offset = -7; outputTensorInfo.SetQuantizationScale(scale); outputTensorInfo.SetQuantizationOffset(offset); inputTensorInfo1.SetQuantizationScale(scale); inputTensorInfo1.SetQuantizationOffset(offset); inputTensorInfo2.SetQuantizationScale(scale); inputTensorInfo2.SetQuantizationOffset(offset); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 }; std::vector input1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, }; std::vector input2 = { 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, }; std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. ConcatQueueDescriptor::ViewOrigin window1(wOrigin1); std::vector wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. ConcatQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); std::unique_ptr inputHandle1 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo1); std::unique_ptr inputHandle2 = subTensorsSupported ? tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : tensorHandleFactory.CreateTensorHandle(inputTensorInfo2); ConcatQueueDescriptor data; WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); std::unique_ptr workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), input1.data()); CopyDataToITensorHandle(inputHandle2.get(), input2.data()); workload->PostAllocationConfigure(); workload->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } LayerTestResult Concat1dUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat1dTestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat2dDim0Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat2dDim1Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat2dDim0DiffInputDimsUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim0DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat2dDim1DiffInputDimsUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat2dDim1DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat3dDim0Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat3dDim1Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat3dDim2Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat3dDim2TestImpl( workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1); } LayerTestResult Concat3dDim0DiffInputDimsUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat3dDim1DiffInputDimsUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat3dDim1DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat3dDim2DiffInputDimsUint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat3dDim2DiffInputDimsTestImpl( workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1); } LayerTestResult Concat4dDim0Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim0TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDim1Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim1TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDim2Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDim2TestImpl(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDim3Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat4dDim3TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor); } LayerTestResult Concat4dDiffShapeDim0Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim0TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDiffShapeDim1Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim1TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDiffShapeDim2Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return Concat4dDiffShapeDim2TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1); } LayerTestResult Concat4dDiffShapeDim3Uint8Test( IWorkloadFactory& workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor) { return Concat4dDiffShapeDim3TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor); }