From ae12306486efc55293a40048618abe5e8b19151b Mon Sep 17 00:00:00 2001 From: Matthew Sloyan Date: Fri, 7 May 2021 14:18:01 +0000 Subject: Revert "MLCE-418 Reduce layer does not support multiple axes" This reverts commit d905decd256558bbee165e636ce4242ac3b9c917. Reason for revert: LargeGraph_TENSOR_FLOAT32/FLOAT16 CTS tests failures Change-Id: Ie69826549e73775825f45134375b5b2c41aebd01 --- Android.mk | 1 - CMakeLists.txt | 1 - .../test/optimizations/ReduceMultipleAxesTests.cpp | 288 --------------------- src/backends/aclCommon/ArmComputeSubgraphUtils.hpp | 85 ------ src/backends/aclCommon/ArmComputeUtils.hpp | 55 ---- src/backends/cl/ClBackend.cpp | 24 +- src/backends/cl/workloads/ClReduceWorkload.cpp | 51 +--- src/backends/neon/NeonBackend.cpp | 24 +- src/backends/neon/workloads/NeonReduceWorkload.cpp | 53 +--- 9 files changed, 24 insertions(+), 558 deletions(-) delete mode 100644 src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp diff --git a/Android.mk b/Android.mk index 168b32a400..d9230e5585 100644 --- a/Android.mk +++ b/Android.mk @@ -393,7 +393,6 @@ LOCAL_SRC_FILES := \ src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp \ src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp \ src/armnn/test/optimizations/PermuteAsReshapeTests.cpp \ - src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp \ src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp \ src/armnn/test/optimizations/TransposeAsReshapeTests.cpp \ src/armnn/test/OptimizerTests.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index c15dcb0eda..51f08bade9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -566,7 +566,6 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp src/armnn/test/optimizations/PermuteAsReshapeTests.cpp - src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp src/armnn/test/optimizations/TransposeAsReshapeTests.cpp src/armnn/test/OptionalTest.cpp diff --git a/src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp b/src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp deleted file mode 100644 index 28c0ea1466..0000000000 --- a/src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "../GraphUtils.hpp" -#include "../TestUtils.hpp" - -#include - -#include - -using namespace armnn; - -BOOST_AUTO_TEST_SUITE(Optimizer) - -INetworkPtr CreateSimpleReduceNetwork(ReduceDescriptor reduceDescriptor, - TensorShape& inputShape, - TensorShape& outputShape) -{ - // Create a network - INetworkPtr network = INetwork::Create(); - - const std::string layerName("reduce_layer"); - const TensorInfo inputInfo (inputShape, DataType::Float32); - const TensorInfo outputInfo(outputShape, DataType::Float32); - - IConnectableLayer* const inputLayer = network->AddInputLayer(0); - IConnectableLayer* const reduceLayer = network->AddReduceLayer(reduceDescriptor, layerName.c_str()); - IConnectableLayer* const outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); - reduceLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - - inputLayer->GetOutputSlot(0).Connect(reduceLayer->GetInputSlot(0)); - reduceLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - return network; -} - -void ReduceWithMultipleAxesTest(INetworkPtr& network, - const TensorShape& outputShape, - const std::vector& inputData, - const std::vector& expectedOutput, - const size_t numOfAxes, - Compute backendId) -{ - // Create ArmNN runtime - IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); - - // Optimise ArmNN network - IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec()); - - Graph& graph = GetGraphForTesting(optNet.get()); - if (numOfAxes == 2) - { - BOOST_CHECK(graph.GetNumLayers() == 4); - BOOST_TEST(CheckSequence(graph.cbegin(), - graph.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); - } - else - { - BOOST_CHECK(graph.GetNumLayers() == 5); - BOOST_TEST(CheckSequence(graph.cbegin(), - graph.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); - } - - // Get last layer in new chain, layers name follow 0, 1, 2 pattern - std::string layerName = "reduce_layer_" + std::to_string(numOfAxes - 1); - Layer* const reduceLayer = GetFirstLayerWithName(graph, layerName); - BOOST_TEST(reduceLayer); - auto reduceTensorInfo = reduceLayer->GetOutputSlot().GetTensorInfo(); - - // Tensorshape and the data type are correct - BOOST_TEST((reduceTensorInfo.GetShape() == outputShape)); - BOOST_TEST((reduceTensorInfo.GetDataType() == DataType::Float32)); - - // Load network into runtime - NetworkId networkIdentifier; - run->LoadNetwork(networkIdentifier, std::move(optNet)); - - // Create input and output tensors - std::vector outputData(expectedOutput.size()); - InputTensors inputTensors - { - { 0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputData.data()) } - }; - OutputTensors outputTensors - { - { 0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data()) } - }; - - // Run inference - run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); - - // Checks the results - BOOST_TEST(outputData == expectedOutput); -} - -void ReduceSumWithTwoAxesKeepDimsTest(Compute backendId) -{ - armnn::ReduceDescriptor reduceDescriptor; - reduceDescriptor.m_vAxis = { 1, 2 }; - reduceDescriptor.m_KeepDims = true; - reduceDescriptor.m_ReduceOperation = armnn::ReduceOperation::Sum; - - TensorShape inputShape = { 1, 3, 2, 4 }; - TensorShape outputShape = { 1, 1, 1, 4 }; - - // Construct ArmNN network - INetworkPtr network = CreateSimpleReduceNetwork(reduceDescriptor, inputShape, outputShape); - - // Creates structures for input & output. - const std::vector inputData({ 1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 6.0f, 7.0f, 8.0f, - - 10.0f, 20.0f, 30.0f, 40.0f, - 50.0f, 60.0f, 70.0f, 80.0f, - - 100.0f, 200.0f, 300.0f, 400.0f, - 500.0f, 600.0f, 700.0f, 800.0f }); - const std::vector expectedOutput({ 666.0f, 888.0f, 1110.0f, 1332.0f }); - - ReduceWithMultipleAxesTest(network, - outputShape, - inputData, - expectedOutput, - reduceDescriptor.m_vAxis.size(), - backendId); -} - -void ReduceSumWithTwoAxesTest(Compute backendId) -{ - armnn::ReduceDescriptor reduceDescriptor; - reduceDescriptor.m_vAxis = { 1, 2 }; - reduceDescriptor.m_KeepDims = false; - reduceDescriptor.m_ReduceOperation = armnn::ReduceOperation::Sum; - - TensorShape inputShape = { 1, 3, 2, 4 }; - TensorShape outputShape = { 1, 4 }; - - // Construct ArmNN network - INetworkPtr network = CreateSimpleReduceNetwork(reduceDescriptor, inputShape, outputShape); - - // Creates structures for input & output. - const std::vector inputData({ 1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 6.0f, 7.0f, 8.0f, - - 10.0f, 20.0f, 30.0f, 40.0f, - 50.0f, 60.0f, 70.0f, 80.0f, - - 100.0f, 200.0f, 300.0f, 400.0f, - 500.0f, 600.0f, 700.0f, 800.0f }); - const std::vector expectedOutput({ 666.0f, 888.0f, 1110.0f, 1332.0f }); - - ReduceWithMultipleAxesTest(network, - outputShape, - inputData, - expectedOutput, - reduceDescriptor.m_vAxis.size(), - backendId); -} - -void ReduceSumWithThreeAxesKeepDimsTest(Compute backendId) -{ - armnn::ReduceDescriptor reduceDescriptor; - reduceDescriptor.m_vAxis = { 0, 2, 3 }; - reduceDescriptor.m_KeepDims = true; - reduceDescriptor.m_ReduceOperation = armnn::ReduceOperation::Sum; - - TensorShape inputShape = { 2, 2, 2, 2 }; - TensorShape outputShape = { 1, 2, 1, 1 }; - - // Construct ArmNN network - INetworkPtr network = CreateSimpleReduceNetwork(reduceDescriptor, inputShape, outputShape); - - // Creates structures for input & output. - const std::vector inputData({ 1.0f, 2.0f, - 3.0f, 4.0f, - - 5.0f, 6.0f, - 7.0f, 8.0f, - - 10.0f, 20.0f, - 30.0f, 40.0f, - - 50.0f, 60.0f, - 70.0f, 80.0f }); - const std::vector expectedOutput({ 110.0f, 286.0f }); - - ReduceWithMultipleAxesTest(network, - outputShape, - inputData, - expectedOutput, - reduceDescriptor.m_vAxis.size(), - backendId); -} - -void ReduceSumWithThreeAxesTest(Compute backendId) -{ - armnn::ReduceDescriptor reduceDescriptor; - reduceDescriptor.m_vAxis = { 0, 2, 3 }; - reduceDescriptor.m_KeepDims = false; - reduceDescriptor.m_ReduceOperation = armnn::ReduceOperation::Sum; - - TensorShape inputShape = { 2, 2, 2, 2 }; - TensorShape outputShape = { 2 }; - - // Construct ArmNN network - INetworkPtr network = CreateSimpleReduceNetwork(reduceDescriptor, inputShape, outputShape); - - // Creates structures for input & output. - const std::vector inputData({ 1.0f, 2.0f, - 3.0f, 4.0f, - - 5.0f, 6.0f, - 7.0f, 8.0f, - - 10.0f, 20.0f, - 30.0f, 40.0f, - - 50.0f, 60.0f, - 70.0f, 80.0f }); - const std::vector expectedOutput({ 110.0f, 286.0f }); - - ReduceWithMultipleAxesTest(network, - outputShape, - inputData, - expectedOutput, - reduceDescriptor.m_vAxis.size(), - backendId); -} - -using namespace armnn; -#if defined(ARMCOMPUTENEON_ENABLED) -BOOST_AUTO_TEST_CASE(ReduceSumWithTwoAxesKeepDimsCpuAccTest) -{ - ReduceSumWithTwoAxesKeepDimsTest(Compute::CpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithTwoAxesCpuAccTest) -{ - ReduceSumWithTwoAxesTest(Compute::CpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithThreeAxesKeepDimsCpuAccTest) -{ - ReduceSumWithThreeAxesKeepDimsTest(Compute::CpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithThreeAxesCpuAccTest) -{ - ReduceSumWithThreeAxesTest(Compute::CpuAcc); -} -#endif - -#if defined(ARMCOMPUTECL_ENABLED) -BOOST_AUTO_TEST_CASE(ReduceSumWithTwoAxesKeepDimsGpuAccTest) -{ - ReduceSumWithTwoAxesKeepDimsTest(Compute::GpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithTwoAxesGpuAccTest) -{ - ReduceSumWithTwoAxesTest(Compute::GpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithThreeAxesKeepDimsGpuAccTest) -{ - ReduceSumWithThreeAxesKeepDimsTest(Compute::GpuAcc); -} - -BOOST_AUTO_TEST_CASE(ReduceSumWithThreeAxesGpuAccTest) -{ - ReduceSumWithThreeAxesTest(Compute::GpuAcc); -} -#endif - -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp index 9439ddb61e..a0fca46330 100644 --- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp +++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp @@ -6,9 +6,6 @@ #pragma once #include -#include - -#include namespace armnn { @@ -150,86 +147,4 @@ LayerType* FuseLayerWithWeightsAndBiases(OptimizationViews& optimizationViews, return replacementLayer; } -// -// If reduce layer has multiple axes, add new layer for each axis to simulate the same behaviour -// as currently only one axis is supported. -// -template -void ChainReduceLayers(OptimizationViews& optimizationViews, - LayerType* baseLayer, - ReduceDescriptor& reduceDescriptor) -{ - // If layer has single axis don't chain layers. - if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1) - { - // Save base layer output shape to compare against the output of the final layer added. - const TensorInfo baseLayerInfo = baseLayer->GetOutputSlot(0).GetTensorInfo(); - - // Vector of new chained layers, used for substitution. - std::vector layers; - - // Vector of axes so each layer is reshaped correctly. - std::vector reduceAxis; - unsigned int recalulateAxis = 0; - - for (unsigned int i = 0; i != reduceDescriptor.m_vAxis.size(); ++i) - { - // Get TensorInfo to populate subsequent layers with. - TensorInfo layerInfoToModify = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - - reduceAxis.emplace_back(reduceDescriptor.m_vAxis[i]); - - // Calculate new shape based on the axes. - const TensorShape& reducedShape = ComputeReductionTensorShape(layerInfoToModify, - reduceAxis, - reduceDescriptor.m_KeepDims); - layerInfoToModify.SetShape(reducedShape); - - // Create a vector for the single axis to be assigned to the descriptor. - // Update axis if keepDims is set reduce layers correctly. - std::vector singleAxis(1, reduceDescriptor.m_vAxis[i] - recalulateAxis); - - // Create a descriptor and assign single axis. - ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters(); - newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end()); - - // Add new layer to graph. - std::string layerName = "reduce_layer_" + std::to_string(i); - Layer* replacementLayer = optimizationViews.GetGraph().AddLayer(newReduceDescriptor, - layerName.c_str()); - - // Connect previous layer with new layer. - // The first and last layer will be connected when the subgraph is replaced. - if (!layers.empty()) - { - layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0)); - } - - // Set updated tensorInfo for new layer. - replacementLayer->GetOutputSlot(0).SetTensorInfo(layerInfoToModify); - - if (!reduceDescriptor.m_KeepDims) - { - recalulateAxis++; - } - - layers.emplace_back(replacementLayer); - } - - // Check if the TensorInfo from the last layer equals the inferred output from the original layer. - ARMNN_ASSERT(baseLayerInfo == layers.back()->GetOutputSlot().GetTensorInfo()); - - std::list replacementLayers(layers.begin(), layers.end()); - - // Substitute new chained subgraph for original reduce layer. - SubgraphView substitutionSubgraph(baseLayer); - SubgraphView replacementSubgraph(CreateInputsFrom({replacementLayers.front()}), - CreateOutputsFrom({replacementLayers.back()}), - std::move(replacementLayers)); - - optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph}); - - } -} - } // namespace armnn diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp index 5bc5abcb05..d9efab288f 100644 --- a/src/backends/aclCommon/ArmComputeUtils.hpp +++ b/src/backends/aclCommon/ArmComputeUtils.hpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -268,58 +267,4 @@ inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const Redu } } -/// Function to compute the output tensor shape based on the axes and if keepDims is set. -inline const TensorShape ComputeReductionTensorShape(const armnn::TensorInfo& input, - const std::vector& vAxis, - const bool keepDims) -{ - unsigned int rank = input.GetNumDimensions(); - unsigned int outputRank = 0; - - // Calculate output dimension - if (keepDims) - { - outputRank = rank; - } - else if (vAxis.empty()) - { - outputRank = 1; - } - else if (vAxis.size() > input.GetNumDimensions()) - { - throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions"); - } - else - { - outputRank = input.GetNumDimensions() - armnn::numeric_cast(vAxis.size()); - if (outputRank == 0) - { - outputRank = 1; - } - } - - std::vector dimSizes(outputRank, 1); - if (!vAxis.empty()) - { - // Skip the dimension that has been reduced unless keepDims is true. - unsigned int outputIndex = 0; - for (unsigned int i = 0; i < input.GetNumDimensions(); ++i) - { - if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end()) - { - dimSizes[outputIndex] = armnn::numeric_cast(input.GetShape()[i]); - ++outputIndex; - } - else if (keepDims) - { - dimSizes[outputIndex] = 1; - ++outputIndex; - } - } - } - - const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data()); - return inferredShape; -} - } // namespace armnn diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 92a06aa8e1..f97cb4bba8 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -29,7 +29,6 @@ #include "workloads/ClDivisionWorkload.hpp" #include "workloads/ClFullyConnectedWorkload.hpp" #include "workloads/ClMultiplicationWorkload.hpp" -#include "workloads/ClReduceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" #include @@ -189,8 +188,7 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division - || base.GetType() == LayerType::Reduce) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) && (base.GetAdditionalInformation() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -414,26 +412,6 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, } } } - - // Separate check for Reduce as we aren't fusing with activation layer - if (base.GetType() == LayerType::Reduce) - { - ReduceLayer* baseLayer = PolymorphicDowncast(&base); - - // Get params from base layer - ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); - - arm_compute::Status status = ClReduceWorkloadValidate( - baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), - baseLayer->GetOutputSlot(0).GetTensorInfo(), - reduceDescriptor); - - if (status) - { - ChainReduceLayers(optimizationViews, baseLayer, reduceDescriptor); - untouched.erase(baseLayer->GetGuid()); - } - } } } } diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp index 0ad6259cc2..6f594ff7a9 100644 --- a/src/backends/cl/workloads/ClReduceWorkload.cpp +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -20,52 +20,23 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, const ReduceDescriptor& desc) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "ClReduceWorkload: Reduction is supported only on 1 axis."); + } arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), desc.m_vAxis); - // As ACL only support one axis, validate the layer for each axis if more than one is present. - if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) - { - arm_compute::Status status; - - for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) - { - TensorInfo inputToModify = input; - std::vector singleAxis(1, desc.m_vAxis[i]); - // Calculate the output shape using the input shape for a single axis. - // Currently the output TensorInfo inferred will be reduced upon multiple axis - // which will fail validation as only one axis is supported. - const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims); - inputToModify.SetShape(reducedShape); - - const arm_compute::TensorInfo aclOutputInfoModified = - armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify); - - status = arm_compute::CLReductionOperation::validate(&aclInputInfo, - &aclOutputInfoModified, - static_cast(coords[i]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); - if (!status) - { - break; - } - } - return status; - } - else - { - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLReductionOperation::validate(&aclInputInfo, - &aclOutputInfo, - static_cast(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); - } + return arm_compute::CLReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); } ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 6d5eab0ddf..a1299fb458 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -29,7 +29,6 @@ #include "workloads/NeonDivisionWorkload.hpp" #include "workloads/NeonFullyConnectedWorkload.hpp" #include "workloads/NeonMultiplicationWorkload.hpp" -#include "workloads/NeonReduceWorkload.hpp" #include "workloads/NeonSubtractionWorkload.hpp" #include @@ -165,8 +164,7 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division - || base.GetType() == LayerType::Reduce) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) && (base.GetAdditionalInformation() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -391,26 +389,6 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph } } } - - // Separate check for Reduce as we aren't fusing with activation layer - if (base.GetType() == LayerType::Reduce) - { - ReduceLayer* baseLayer = PolymorphicDowncast(&base); - - // Get params from base layer - ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); - - arm_compute::Status status = NeonReduceWorkloadValidate( - baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), - baseLayer->GetOutputSlot(0).GetTensorInfo(), - reduceDescriptor); - - if (status) - { - ChainReduceLayers(optimizationViews, baseLayer, reduceDescriptor); - untouched.erase(baseLayer->GetGuid()); - } - } } } } diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp index 6125f3609d..0e1b46a3a1 100644 --- a/src/backends/neon/workloads/NeonReduceWorkload.cpp +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -21,52 +21,22 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, const ReduceDescriptor& desc) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "NeonReduceWorkload: Reduction is supported only on 1 axis."); + } arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), desc.m_vAxis); - // As ACL only support one axis, validate the layer for each axis if more than one is present. - if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) - { - arm_compute::Status status; - - for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) - { - TensorInfo inputToModify = input; - std::vector singleAxis(1, desc.m_vAxis[i]); - - // Calculate the output shape using the input shape for a single axis. - // Currently the output TensorInfo inferred will be reduced upon multiple axis - // which will fail validation as only one axis is supported. - const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims); - inputToModify.SetShape(reducedShape); - - const arm_compute::TensorInfo aclOutputInfoModified = - armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify); - - status = arm_compute::NEReductionOperation::validate(&aclInputInfo, - &aclOutputInfoModified, - static_cast(coords[i]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); - if (!status) - { - break; - } - } - return status; - } - else - { - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::NEReductionOperation::validate(&aclInputInfo, - &aclOutputInfo, - static_cast(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); - } + return arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); } NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) @@ -80,7 +50,6 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_vAxis); - m_Layer.configure(&input, &output, static_cast(coords[0]), -- cgit v1.2.1