aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/convolution/winograd/output_transforms
diff options
context:
space:
mode:
authorMichael Tyler <michael.tyler@arm.com>2024-06-04 15:47:37 +0100
committerMichael Tyler <michael.tyler@arm.com>2024-06-25 09:10:13 +0000
commitfc94f4d23abd4bc427b701f54ad85282e9ec7872 (patch)
tree5e2980599256e2b2f4374e5beb61596fc95c9d5a /src/core/NEON/kernels/convolution/winograd/output_transforms
parentc2237ec4094c7824f8f7e61bc89504d01c5b59ff (diff)
downloadComputeLibrary-fc94f4d23abd4bc427b701f54ad85282e9ec7872.tar.gz
Update CPU kernels and add mixed sign GEMM support
- Add support for mixed sign quantized convolution. - Add support for mixed sign dequantized GEMM. - Add SME FP16 GEMV kernel. - Change SME vector length function to use RDSVL instead of static variable. - Add GEMM dilation support internally (not exposed yet). - Remove unused "get_default_activation_values" functions. - Add SVE fixed format interleaved BF16 DOT kernel. - Updates and optimizations to assembly kernels. Resolves COMPMID-6926 Change-Id: I227f502502611d4cc4111c89e30c53ce94079544 Signed-off-by: Michael Tyler <michael.tyler@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11570 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/output_transforms')
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp10
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp8
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp8
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp8
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp10
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp10
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp10
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp12
8 files changed, 38 insertions, 38 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp
index 295005a2ee..4218b754b4 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp
@@ -34,13 +34,13 @@ namespace output_transform {
void a64_fp16_4x4_3x3(
unsigned int n_channels,
const __fp16* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const __fp16* bptr,
__fp16* const output,
- size_t output_row_stride,
- size_t output_col_stride,
- __fp16 output_min,
- __fp16 output_max
+ const size_t output_row_stride,
+ const size_t output_col_stride,
+ const __fp16 output_min,
+ const __fp16 output_max
)
{
constexpr int output_tile_rows = 4, output_tile_cols = 4;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp
index 8c6cf9725e..c52df266a5 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp
@@ -33,13 +33,13 @@ namespace output_transform {
void arm_fp32_1x2_1x7(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
size_t, // No need to stride across rows
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr auto inner_tile_cols = 8u, output_tile_cols = 2u;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp
index ac05f23221..7d771abeee 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp
@@ -33,13 +33,13 @@ namespace output_transform {
void arm_fp32_1x4_1x5(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
size_t, // No need to stride across rows
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr auto inner_tile_cols = 8u, output_tile_cols = 4u;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp
index 154dc6fe1a..513908190a 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp
@@ -34,13 +34,13 @@ namespace output_transform {
void arm_fp32_1x6_1x3(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
size_t, // No need to stride across rows
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr unsigned int inner_tile_cols = 8, output_tile_cols = 6;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp
index 28f042bcbf..4c7376bef8 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp
@@ -33,13 +33,13 @@ namespace output_transform {
void arm_fp32_2x2_3x3(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
- size_t output_row_stride,
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_row_stride,
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr auto output_tile_rows = 2u, output_tile_cols = 2u;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp
index 8e5ba74ac3..d5649b8a18 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp
@@ -33,13 +33,13 @@ namespace output_transform {
void arm_fp32_2x2_5x5(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
- size_t output_row_stride,
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_row_stride,
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr auto output_tile_rows = 2u, output_tile_cols = 2u;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp
index 72c43019fa..6a32f67b5d 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp
@@ -33,13 +33,13 @@ namespace output_transform {
void arm_fp32_4x4_3x3(
unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float *outptr,
- size_t output_row_stride,
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_row_stride,
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
constexpr auto output_tile_rows = 4u, output_tile_cols = 4u;
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp
index 043914d590..8d2b00c1fb 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp
@@ -31,15 +31,15 @@ namespace winograd {
namespace output_transform {
void sme_fp32_mopa_4x4_3x3(
- unsigned int n_channels,
+ const unsigned int n_channels,
const float* inptr,
- size_t matrix_stride,
+ const size_t matrix_stride,
const float* bptr,
float* const output,
- size_t output_row_stride,
- size_t output_col_stride,
- float output_min,
- float output_max
+ const size_t output_row_stride,
+ const size_t output_col_stride,
+ const float output_min,
+ const float output_max
)
{
// The below assembler uses the Kronecker product and the "vec trick" to