aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLReductionOperation.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-04-28 16:11:51 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-04-29 17:06:28 +0000
commit3ecf9fefa6f6299a0736599f150d4791cc8345d9 (patch)
tree749d73c8496d152600f528e442b4e4f11a81621e /arm_compute/runtime/CL/functions/CLReductionOperation.h
parent8ed3ac11352d1ebb2c69787432fa5893997f1c50 (diff)
downloadComputeLibrary-3ecf9fefa6f6299a0736599f150d4791cc8345d9.tar.gz
Remove OpenCL padding: CLReductionOperationKernel
Change the parallel implementation across the X, now every thread computes one row Add missing test for MEAN_SUM Make reduction on any axis != 0 work with num_channels > 1 Resolve COMPMID-3917 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib0f99540104e3c253bcd1ea637833db533f5e76e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5522 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLReductionOperation.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h18
1 files changed, 7 insertions, 11 deletions
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 3fbcee6c21..58164fdcb3 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,6 @@ namespace arm_compute
{
// Forward declarations
class CLCompileContext;
-class CLFillBorderKernel;
class CLReductionOperationKernel;
class ICLTensor;
@@ -99,15 +98,12 @@ public:
private:
ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output);
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- std::vector<std::unique_ptr<CLReductionOperationKernel>> _reduction_kernels_vector;
- std::vector<std::unique_ptr<CLFillBorderKernel>> _border_handlers_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
- bool _is_serial;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ CLTensor _unreshaped_output;
+ std::unique_ptr<CLReductionOperationKernel> _reduction_kernel;
+ CLReshapeLayer _reshape;
+ unsigned int _reduction_axis;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */ \ No newline at end of file