From 3537c2ca7ebf31c1673b9ec2bb0c17b0406bbae0 Mon Sep 17 00:00:00 2001 From: surmeh01 Date: Fri, 18 May 2018 16:31:43 +0100 Subject: Release 18.05 --- src/armnn/backends/test/LayerTests.cpp | 777 ++++++++++++++++++++++----------- 1 file changed, 514 insertions(+), 263 deletions(-) (limited to 'src/armnn/backends/test/LayerTests.cpp') diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp index 9eed2dbf78..a10e4bd7a0 100644 --- a/src/armnn/backends/test/LayerTests.cpp +++ b/src/armnn/backends/test/LayerTests.cpp @@ -6,8 +6,10 @@ #include "test/TensorHelpers.hpp" #include "TensorCopyUtils.hpp" +#include "Permute.hpp" #include +#include #include "armnn/LayerSupport.hpp" @@ -342,11 +344,11 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor std::vector myVec(outputDesc.GetNumElements(), 0); boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( QuantizedVector(qScale, qOffset, { - -4723, -7044, -9324, -6253, -3542, -7140, -10580, -13940, -9300, -5230, -9590, -14120, -18520, -12290, -6860, -9980, -14560, -18960, -12560, -7000, -7518, -10904, -14144, -9318, -5152, + -5032, -7256, -9376, -6142, -3368, }))); return SimpleConvolution2dTestImpl(workloadFactory, @@ -357,9 +359,79 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor qScale, qOffset, 1, // padding left - 2, // padding top + 1, // padding top 2, // padding right - 1); // padding bottom + 2); // padding bottom +} + +template +LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a single-batch 2-channel 5x5 image as input + armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49 + }))); + + // Use a depth multiplier of 1 on a 2-channel 4x4 kernel + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType()); + auto kernel = MakeTensor(kernelTensorInfo, std::vector( + QuantizedVector(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + }))); + + // Expected output is 1 batch of a 2-channel 5x5 image + // calculated using the python tensorflow library with strideX=1, strideY=1 + armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputTensorInfo, std::vector( + QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 1062, 1580, 1850, 1530, 1117, + 2140, 3108, 3500, 2842, 2042, + 3580, 5068, 5460, 4342, 3062, + 3618, 5072, 5390, 4248, 2971, + 3074, 4282, 4510, 3533, 2457, + 1550, 2284, 2362, 1955, 1428, + 2910, 4206, 4342, 3528, 2536, + 3390, 4886, 5022, 4068, 2916, + 3566, 5056, 5182, 4133, 2922, + 3100, 4352, 4452, 3517, 2465 + }))); + + return DepthwiseConvolution2dAsymmetricTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 1, // padding top + 2, // padding right + 2, // padding bottom + 1, // strideX + 1); // strideY } LayerTestResult @@ -385,6 +457,12 @@ LayerTestResult DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFa return DepthwiseConvolution2dDepthMul1TestImpl(workloadFactory, 0.0f, 0, biasEnabled); } +LayerTestResult DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dAsymmetricTestCommon(workloadFactory, 0.0f, 0, biasEnabled); +} + LayerTestResult DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) { @@ -493,138 +571,85 @@ LayerTestResult CopyViaSplitterUint8Test(armnn::IWorkloadFactory& wo LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) { - unsigned int outputWidth = 5; + unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; - unsigned int inputWidth1 = 2; - unsigned int inputHeight1 = 2; - unsigned int inputChannels1 = 3; - - unsigned int inputWidth2 = 2; - unsigned int inputHeight2 = 4; - unsigned int inputChannels2 = 3; - - unsigned int inputWidth3 = 3; - unsigned int inputHeight3 = 6; - unsigned int inputChannels3 = 2; + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; - unsigned int inputWidth4 = 3; - unsigned int inputHeight4 = 6; - unsigned int inputChannels4 = 1; + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; // Define the tensor descriptors armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32); LayerTestResult ret(outputTensorInfo); - ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + { + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, + + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, + 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, }) ); - auto input1 = MakeTensor(inputTensorInfo1, std::vector( { - 1.0f, 2.0f, - 6.0f, 7.0f, - - 31.0f, 32.0f, - 36.0f, 37.0f, + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, - 61.0f, 62.0f, - 66.0f, 67.0f, + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, }) ); auto input2 = MakeTensor(inputTensorInfo2, std::vector( { - 11.0f, 12.0f, - 16.0f, 17.0f, - 21.0f, 22.0f, - 26.0f, 27.0f, - - 41.0f, 42.0f, - 46.0f, 47.0f, - 51.0f, 52.0f, - 56.0f, 57.0f, - - 71.0f, 72.0f, - 76.0f, 77.0f, - 81.0f, 82.0f, - 86.0f, 87.0f, - }) - ); - - auto input3 = MakeTensor(inputTensorInfo3, std::vector( - { - 3.0f, 4.0f, 5.0f, - 8.0f, 9.0f, 10.0f, - 13.0f, 14.0f, 15.0f, - 18.0f, 19.0f, 20.0f, - 23.0f, 24.0f, 25.0f, - 28.0f, 29.0f, 30.0f, - - 33.0f, 34.0f, 35.0f, - 38.0f, 39.0f, 40.0f, + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 48.0f, 49.0f, 50.0f, - 53.0f, 54.0f, 55.0f, - 58.0f, 59.0f, 60.0f, - }) - ); - - - auto input4 = MakeTensor(inputTensorInfo4, std::vector( - { - 63.0f, 64.0f, 65.0f, - 68.0f, 69.0f, 70.0f, - 73.0f, 74.0f, 75.0f, - 78.0f, 79.0f, 80.0f, - 83.0f, 84.0f, 85.0f, - 88.0f, 89.0f, 90.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, }) ); std::vector wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0] armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1] + std::vector wOrigin2 = {2, 0, 0}; //extent of the window is defined by size of input[1] armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2] - armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); - - std::vector wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3] - armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); - - std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); @@ -639,43 +664,25 @@ LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr inputHandle3 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo3); - - std::unique_ptr inputHandle4 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo4); - - armnn::MergerQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); - AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); - data.m_ViewOrigins.push_back(window3); - data.m_ViewOrigins.push_back(window4); std::unique_ptr workload = workloadFactory.CreateMerger(data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); - inputHandle3->Allocate(); - inputHandle4->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); - CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); @@ -765,6 +772,7 @@ LayerTestResult AdditionTest(armnn::IWorkloadFactory& workloadFactory) CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -841,6 +849,7 @@ LayerTestResult AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloa CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -912,6 +921,7 @@ LayerTestResult AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -996,7 +1006,9 @@ LayerTestResult CompareAdditionTest(armnn::IWorkloadFactory& workloadFa CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1043,6 +1055,7 @@ LayerTestResult MultiplicationTestHelper(armnn::IWorkloadFactory& workl CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1185,7 +1198,9 @@ LayerTestResult CompareMultiplicationTest(armnn::IWorkloadFactory& work CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); @@ -1264,7 +1279,9 @@ LayerTestResult CompareBatchNormTest(armnn::IWorkloadFactory& workloadF CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1273,23 +1290,299 @@ LayerTestResult CompareBatchNormTest(armnn::IWorkloadFactory& workloadF return ret; } -void Concatenate(armnn::IWorkloadFactory& workloadFactory, - std::initializer_list inputTensorInfos, - std::initializer_list inputs, - const armnn::TensorInfo& outputTensorInfo, - void* output, - unsigned int concatDim) -{ - armnn::MergerQueueDescriptor queueDescriptor; +template +void PermuteTensorData( + armnn::IWorkloadFactory& workloadFactory, + const armnn::PermutationVector& mappings, + armnn::TensorInfo & inputTensorInfo, + const T * inputData, + std::vector& outputData) +{ + BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null"); + if (inputData == nullptr) + { + // Nullptr is an error in the test. By returning without doing the concatenation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings}; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData); + + workload->Execute(); + + outputData.resize(outputTensorInfo.GetNumElements()); + CopyDataFromITensorHandle(&outputData[0], outputHandle.get()); + inputTensorInfo = outputTensorInfo; +} +armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation( + const std::vector & inputTensorInfos, + unsigned int concatDim) +{ std::vector shapes; shapes.reserve(inputTensorInfos.size()); for (const armnn::TensorInfo& it: inputTensorInfos) { shapes.push_back(it.GetShape()); } - armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), - shapes.end(), concatDim); + + return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), + shapes.end(), + concatDim); +} + +// +// Concatenation is only supported for N and C dimensions for NCHW. In case of +// <4 dimensions we need to make sure that the concat dimensions is at least +// the 3rd slowest iterating one. +// + +bool NeedPermuteForConcat( + const std::vector & inputTensorInfos, + unsigned int concatDim) +{ + // See note above. Additionally we expect the input shapes to have the + // same number of dimensions. + unsigned int nDimensions = 0; + + // determine the number of dimensions as well as sanity check them + // agains test implementation issues + for (auto && tensorInfo : inputTensorInfos) + { + if (!nDimensions) + { + nDimensions = tensorInfo.GetShape().GetNumDimensions(); + } + else + { + BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(), + "Input shapes must have the same number of dimensions"); + } + } + + return (nDimensions-concatDim) < 3; +} + +armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape) +{ + unsigned int numDims = inputShape.GetNumDimensions(); + if (numDims >= 3) + { + // Nothing to do if the inputShape has at least 3 dimensions. + return inputShape; + } + + std::vector newDims(size_t(3), 1u); + unsigned int expandedBy = 3 - numDims; + for (unsigned int i=0; i & permutations) +{ + BOOST_ASSERT_MSG(numDimensions <= 3, + "Only dimensions 1,2 and 3 are supported by this helper"); + + unsigned int expandedBy = 3 - numDimensions; + unsigned int expandedConcatAxis = concatDim + expandedBy; + + if (expandedConcatAxis == 2) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({1, 2, 0}); + armnn::PermutationVector reversePermutation({2, 0, 1}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else if (expandedConcatAxis == 1) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({2, 0, 1}); + armnn::PermutationVector reversePermutation({1, 2, 0}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else + { + BOOST_ASSERT(expandedConcatAxis == 0); + concatDim = 0; + } +} + +// +// Permute the input tensors so we can do a supported concatenation. +// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions +// at the front. Finally this function tells what the output shape +// of the permuted concatenated tensor is going to be. +// +template +void PermuteInputsForConcat( + armnn::IWorkloadFactory& workloadFactory, + std::vector & inputTensorInfos, + std::vector & inputData, + std::vector> & inputDataStorage, + armnn::PermutationVector & permuteVector, + unsigned int & concatDim, + armnn::TensorInfo & outputTensorInfo) +{ + BOOST_ASSERT_MSG(inputTensorInfos.size() > 1, + "Expecting more than one tensor to be concatenated here"); + + unsigned int numDims = 0; + unsigned int nthInput = 0; + const armnn::PermutationVector identity({0, 1, 2}); + + std::pair permutations = + std::make_pair(identity, identity); + + inputDataStorage.resize(inputData.size()); + + for (auto && tensorInfo : inputTensorInfos) + { + if (numDims == 0) + { + numDims = tensorInfo.GetShape().GetNumDimensions(); + Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); + // store the reverese permutation + permuteVector = permutations.second; + BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity), + "Test logic error, we don't need permutation, so we shouldn't arrive here"); + } + else + { + BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(), + "All inputs must have the same number of dimensions"); + } + + armnn::TensorInfo newTensorInfo = tensorInfo; + newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape())); + + PermuteTensorData(workloadFactory, + permutations.first, + newTensorInfo, + inputData[nthInput], + inputDataStorage[nthInput]); + + inputData[nthInput] = inputDataStorage[nthInput].data(); + inputTensorInfos[nthInput] = newTensorInfo; + + ++nthInput; + } + + outputTensorInfo.SetShape( + armnnUtils::Permuted( + ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()), + permutations.first)); +} + + +// +// This is the pair of PermuteInputsForConcat(...) which permutes back +// the output of the concatenation so we can check against an expected +// output. +// +template +void PermuteOutputForConcat( + armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo & tensorInfo, + const armnn::PermutationVector & permuteVector, + std::unique_ptr && inputDataHandle, + T * data) +{ + BOOST_ASSERT_MSG(data != nullptr, "data must not be null"); + if (data == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo resultTensorInfo = tensorInfo; + std::vector inputData(tensorInfo.GetNumElements()); + std::vector outputData; + + CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get()); + + PermuteTensorData(workloadFactory, + permuteVector, + resultTensorInfo, + &inputData[0], + outputData); + + ::memcpy(data, &outputData[0], sizeof(T)*outputData.size()); +} + +template +void Concatenate(armnn::IWorkloadFactory& workloadFactory, + std::initializer_list inputTensorInfosOrig, + std::initializer_list inputsOrig, + const armnn::TensorInfo& outputTensorInfoOrig, + T * output, + unsigned int concatDim) +{ + BOOST_ASSERT_MSG(output != nullptr, "output must not be null"); + if (output == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::MergerQueueDescriptor queueDescriptor; + + // save a copy of the parameters which we might need to change + std::vector inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); + std::vector inputs = inputsOrig; + armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig; + + armnn::PermutationVector permuteVector{0, 1, 2}; + + // hold and automatically release memory for the reshaped input data + std::vector> tmpInputDataStorage; + + const size_t inputCount = inputTensorInfos.size(); + + bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim); + + if (needPermuteForConcat) + { + // + // We need to permute the inputs, because concatenation along + // the requested axis is not supported + // + PermuteInputsForConcat(workloadFactory, + inputTensorInfos, + inputs, + tmpInputDataStorage, + permuteVector, + concatDim, + outputTensorInfo); + } + + armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim); queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) @@ -1298,8 +1591,6 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); } - const size_t inputCount = inputTensorInfos.size(); - std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); std::vector> inputHandles; @@ -1308,7 +1599,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); for (unsigned int i = 0; i < inputCount; ++i) { - const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i]; + const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i]; std::unique_ptr inputHandle = subTensorsSupported ? workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), @@ -1322,7 +1613,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, for (unsigned int i = 0; i < inputCount; ++i) { - AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get()); + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get()); } AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); @@ -1339,12 +1630,25 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, unsigned int nextInputId = 0; for (auto& inputHandle : inputHandles) { - CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++)); + CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]); + ++nextInputId; } + workloadFactory.Finalize(); workload->Execute(); - CopyDataFromITensorHandle(output, outputHandle.get()); + if (needPermuteForConcat) + { + PermuteOutputForConcat(workloadFactory, + outputTensorInfo, + permuteVector, + std::move(outputHandle), + output); + } + else + { + CopyDataFromITensorHandle(output, outputHandle.get()); + } } template @@ -1362,7 +1666,7 @@ LayerTestResult Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1419,7 +1723,7 @@ LayerTestResult Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1524,7 +1828,7 @@ LayerTestResult Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1596,7 +1900,7 @@ LayerTestResult Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1693,7 +1997,7 @@ LayerTestResult Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1953,7 +2257,7 @@ LayerTestResult Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2091,7 +2395,7 @@ LayerTestResult Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2229,7 +2533,7 @@ LayerTestResult Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadF std::vector output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2306,6 +2610,7 @@ LayerTestResult ResizeBilinearNopTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2358,6 +2663,7 @@ LayerTestResult SimpleResizeBilinearTest(armnn::IWorkloadFactory& work outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2408,6 +2714,7 @@ LayerTestResult ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workl outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2457,6 +2764,7 @@ LayerTestResult ResizeBilinearMinTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2507,6 +2815,7 @@ LayerTestResult ResizeBilinearMagTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2555,6 +2864,7 @@ LayerTestResult FakeQuantizationTest(armnn::IWorkloadFactory& workload CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); @@ -2617,6 +2927,7 @@ LayerTestResult L2Normalization1dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2686,6 +2997,7 @@ LayerTestResult L2Normalization2dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2766,6 +3078,7 @@ LayerTestResult L2Normalization3dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2929,6 +3242,7 @@ LayerTestResult L2Normalization4dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3022,6 +3336,7 @@ LayerTestResult ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, outputHandle->Allocate(); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3040,32 +3355,22 @@ LayerTestResult ConstantTestUint8(armnn::IWorkloadFactory& workloadF LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) { - unsigned int outputWidth = 5; + unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; - unsigned int inputWidth1 = 2; - unsigned int inputHeight1 = 2; - unsigned int inputChannels1 = 3; + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; - unsigned int inputWidth2 = 2; - unsigned int inputHeight2 = 4; - unsigned int inputChannels2 = 3; - - unsigned int inputWidth3 = 3; - unsigned int inputHeight3 = 6; - unsigned int inputChannels3 = 2; - - unsigned int inputWidth4 = 3; - unsigned int inputHeight4 = 6; - unsigned int inputChannels4 = 1; + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; // Define the tensor descriptors armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8); // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize const float scale = 0.13497836f; @@ -3077,113 +3382,69 @@ LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFac inputTensorInfo1.SetQuantizationOffset(offset); inputTensorInfo2.SetQuantizationScale(scale); inputTensorInfo2.SetQuantizationOffset(offset); - inputTensorInfo3.SetQuantizationScale(scale); - inputTensorInfo3.SetQuantizationOffset(offset); - inputTensorInfo4.SetQuantizationScale(scale); - inputTensorInfo4.SetQuantizationOffset(offset); LayerTestResult ret(outputTensorInfo); ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( - { - 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, - - 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, - - 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, - 81, 82, 83, 84, 85, - 86, 87, 88, 89, 90, - }) + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, + + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, + + 37, 38, 39, + 40, 41, 42, + 43, 44, 45, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, + }) ); - auto input1 = MakeTensor(inputTensorInfo1, std::vector( { - 1, 2, - 6, 7, - - 31, 32, - 36, 37, + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, - 61, 62, - 66, 67, + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, }) ); auto input2 = MakeTensor(inputTensorInfo2, std::vector( { - 11, 12, - 16, 17, - 21, 22, - 26, 27, - - 41, 42, - 46, 47, - 51, 52, - 56, 57, - - 71, 72, - 76, 77, - 81, 82, - 86, 87, - }) - ); - - auto input3 = MakeTensor(inputTensorInfo3, std::vector( - { - 3, 4, 5, - 8, 9, 10, - 13, 14, 15, - 18, 19, 20, - 23, 24, 25, - 28, 29, 30, - - 33, 34, 35, - 38, 39, 40, + 37, 38, 39, + 40, 41, 42, 43, 44, 45, - 48, 49, 50, - 53, 54, 55, - 58, 59, 60, - }) - ); - - - auto input4 = MakeTensor(inputTensorInfo4, std::vector( - { - 63, 64, 65, - 68, 69, 70, - 73, 74, 75, - 78, 79, 80, - 83, 84, 85, - 88, 89, 90, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, }) ); std::vector wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0] armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1] + std::vector wOrigin2 = { 2, 0, 0 }; //extent of the window is defined by size of input[1] armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2] - armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); - - std::vector wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3] - armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); - std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); @@ -3199,43 +3460,26 @@ LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFac workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr inputHandle3 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo3); - - std::unique_ptr inputHandle4 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo4); - armnn::MergerQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); - AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); - data.m_ViewOrigins.push_back(window3); - data.m_ViewOrigins.push_back(window4); std::unique_ptr workload = workloadFactory.CreateMerger(data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); - inputHandle3->Allocate(); - inputHandle4->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); - CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); @@ -3310,6 +3554,7 @@ LayerTestResult AdditionUint8Test(armnn::IWorkloadFactory& workloadF CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3371,6 +3616,7 @@ LayerTestResult MultiplicationUint8TestHelper(armnn::IWorkloadFactor CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3531,6 +3777,7 @@ LayerTestResult ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3588,6 +3835,7 @@ LayerTestResult SimpleResizeBilinearUint8Test(armnn::IWorkloadFactor outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3643,6 +3891,7 @@ LayerTestResult ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3696,6 +3945,7 @@ LayerTestResult ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3751,6 +4001,7 @@ LayerTestResult ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); -- cgit v1.2.1