aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-09-28 10:30:18 +0100
committerGunes Bayir <gunes.bayir@arm.com>2023-10-02 16:07:22 +0000
commitc2a51bd2cc7c4148d9444e7377af44b2f6c264ba (patch)
treee8f66188d7e048a3f61d660c236ef66b33a0bf35 /arm_compute
parenta396da19ee6e5c36ae07c11e4f16a6787e9bc143 (diff)
downloadComputeLibrary-c2a51bd2cc7c4148d9444e7377af44b2f6c264ba.tar.gz
Optimize CL and Neon Winograd tests
Several test optimizations have been introduced into Winograd tests for Gpu and Cpu backends. The testing strategy has been detailed as a comment header in the test design files. In summary - Very large shapes in the nightly are made smaller - If the underlying kernel is the same for different data types, we only need to stress some key aspects of the kernels (e.g. read/write lengths in case of fp32/fp16). - In case the underlying kernel is the same (OpenCL), Fp16 is tested on a subset of the shapes - In Cpu, there is no need to test every combination for both NCHW and NHWC as we just permute the inputs and use NHWC kernels anyways - All activations does not need to be tested for each and every shape Resolves: COMPMID-6464 Change-Id: Ie25fded85c65b9c7386dc21b23f9b695b1e77b07 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10393 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 7f4e354362..6caa2aeb59 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
#include "arm_compute/core/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
@@ -77,7 +77,8 @@ public:
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
+ * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32
+ * -> 3x3 for Fp16
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -117,4 +118,4 @@ private:
std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H