// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "WorkloadUtils.hpp" namespace armnn { armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor, const PermutationVector& permutationVector, void* permuteBuffer) { BOOST_ASSERT_MSG(tensor, "Invalid input tensor"); BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer"); TensorInfo tensorInfo = tensor->GetTensorInfo(); if (permutationVector.GetSize() > 0) { tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector); armnnUtils::Permute(tensorInfo.GetShape(), permutationVector, tensor->GetConstTensor(), permuteBuffer, GetDataTypeSize(tensorInfo.GetDataType())); } else { ::memcpy(permuteBuffer, tensor->GetConstTensor(), tensorInfo.GetNumBytes()); } return ConstTensor(tensorInfo, permuteBuffer); } void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout) { // Reshape the weights in-place const TensorShape& weightShape = weightInfo.GetShape(); switch (dataLayout) { case DataLayout::NHWC: // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ] weightInfo.SetShape({ 1, weightShape[0], weightShape[1], weightShape[2] * weightShape[3] }); weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] }); break; case DataLayout::NCHW: default: // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ] weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] }); break; } } template ConstTensor ReorderWeightChannelsForAcl(const ConstTensor& weightHandle, DataLayout dataLayout, void* permuteBuffer) { DataType* weight = static_cast(permuteBuffer); const TensorShape& weightShape = weightHandle.GetShape(); unsigned int multiplier; unsigned int height; unsigned int width; unsigned int inputChannels; switch (dataLayout) { case DataLayout::NHWC: //It actually is [ H, W, I, M ] height = weightShape[0]; width = weightShape[1]; inputChannels = weightShape[2]; multiplier = weightShape[3]; break; case DataLayout::NCHW: //It actually is [ M, I, H, W ] default: height = weightShape[2]; width = weightShape[3]; inputChannels = weightShape[1]; multiplier = weightShape[0]; break; } DataType weightAclOrder[height*width*inputChannels*multiplier]; unsigned int destinationWeightsChannel; unsigned int totalChannels = inputChannels * multiplier; unsigned int channelSize = height * width; for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++) { if (originWeightsChannel % inputChannels == 0) { destinationWeightsChannel = originWeightsChannel / inputChannels; } else { destinationWeightsChannel = (originWeightsChannel - 1) / inputChannels + multiplier; } for (unsigned int i = 0; i < channelSize; i++) { weightAclOrder[i + destinationWeightsChannel * channelSize] = weight[i + originWeightsChannel * channelSize]; } } ::memcpy(permuteBuffer, weightAclOrder, weightHandle.GetInfo().GetNumBytes()); return ConstTensor(weightHandle.GetInfo(), permuteBuffer); } TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout) { // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library // 1. Permute the weights if necessary // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done // starting from the current shape of [ M, I, H, W ] TensorInfo weightPermutedInfo(weightInfo); if (dataLayout == DataLayout::NHWC) { // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ] PermutationVector permutationVector{ 3, 2, 0, 1 }; weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector); } // 2. Reshape the weights ReshapeWeightsForAcl(weightPermutedInfo, dataLayout); // 3. Return the permuted weight info return weightPermutedInfo; } armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor, DataLayout dataLayout, void* permuteBuffer) { BOOST_ASSERT_MSG(weightTensor, "Invalid input tensor"); BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer"); auto multiplier = weightTensor->GetTensorInfo().GetShape()[0]; auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1]; // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library // 1. Permute the weights if necessary // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done // starting from the current shape of [ M, I, H, W ] // If no permutation is necessary, leave the permutation vector empty PermutationVector permutationVector{}; if (dataLayout == DataLayout::NHWC) { // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ] permutationVector = { 3, 2, 0, 1 }; } ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer); // Shuffle the weights data to obtain the channel order needed used by Acl if (multiplier > 1 and inputChannels > 1 and dataLayout == DataLayout::NCHW) { switch (weightPermuted.GetDataType()) { case DataType::Float32: weightPermuted = ReorderWeightChannelsForAcl(weightPermuted, dataLayout, permuteBuffer); break; case DataType::Float16: weightPermuted = ReorderWeightChannelsForAcl(weightPermuted, dataLayout, permuteBuffer); break; case DataType::QuantisedAsymm8: weightPermuted = ReorderWeightChannelsForAcl(weightPermuted, dataLayout, permuteBuffer); break; default: break; } } // 2. Reshape the weights ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout); // 3. Return both the tensor and the allocated storage to ensure that the data stays alive return weightPermuted; } } // namespace armnn