aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-10 15:33:57 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-10 18:00:14 +0000
commitce3a7b27f80960e88415bb6cabbb75de2239cea8 (patch)
treee7d6021996a62632c08f6cce81f73467754530e1 /src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp
parenta26e166829f4d4c48864b1b7243e4e267373d0fd (diff)
downloadComputeLibrary-ce3a7b27f80960e88415bb6cabbb75de2239cea8.tar.gz
COMPMID-3259: Fix scalar register allocation
The Aarch64 ABI reserves X18 for platform ABIs, replace all references to X18 with a different register which doesn't have a special purpose. Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Ia9e059d44c5edda216bea169d0418bb7a8c4311b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2863 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Nikhil Raj Arm <nikhil.raj@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp')
-rw-r--r--src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp42
1 files changed, 21 insertions, 21 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp b/src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp
index 908fc8292a..e4aad76d97 100644
--- a/src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp
@@ -44,9 +44,9 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
__asm__ __volatile__(
"ldr q0, [%[pcoeffs]]\n"
"add x25, %[inptr0], %[input_row_stride]\n"
- "add x18, %[input_col_stride1], %[input_col_stride1]\n"
+ "add x9, %[input_col_stride1], %[input_col_stride1]\n"
"add x16, x25, %[input_row_stride]\n"
- "add x19, x18, %[input_col_stride1]\n"
+ "add x19, x9, %[input_col_stride1]\n"
"add x26, x16, %[input_row_stride]\n"
"add x20, x19, %[input_col_stride1]\n"
"add x17, x26, %[input_row_stride]\n"
@@ -65,7 +65,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"blt 2f\n"
"1:\n"
"ldr q8, [%[inptr0], x20]\n"
- "ldr q2, [%[inptr0], x18]\n"
+ "ldr q2, [%[inptr0], x9]\n"
"mov v14.16b, v8.16b\n"
"ldr q9, [%[inptr0]]\n"
"mov v10.16b, v8.16b\n"
@@ -77,7 +77,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v10.4s, v12.4s, v0.s[2]\n"
"ldr q5, [x16, x20]\n"
"fmls v14.4s, v2.4s, v0.s[3]\n"
- "ldr q20, [x16, x18]\n"
+ "ldr q20, [x16, x9]\n"
"fmla v9.4s, v12.4s, v0.s[2]\n"
"ldr q3, [x16]\n"
"fmls v10.4s, v2.4s, v0.s[2]\n"
@@ -89,7 +89,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fadd v10.4s, v10.4s, v4.4s\n"
"ldr q17, [x17, x20]\n"
"fmls v7.4s, v12.4s, v0.s[1]\n"
- "ldr q15, [x17, x18]\n"
+ "ldr q15, [x17, x9]\n"
"fsub v9.4s, v9.4s, v4.4s\n"
"ldr q19, [x17]\n"
"mov v8.16b, v8.16b\n"
@@ -180,7 +180,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"mov v25.16b, v19.16b\n"
"ldr q11, [x25, x20]\n"
"mov v10.16b, v11.16b\n"
- "ldr q23, [x25, x18]\n"
+ "ldr q23, [x25, x9]\n"
"mov v9.16b, v11.16b\n"
"ldr q7, [x25]\n"
"fmla v10.4s, v7.4s, v0.s[2]\n"
@@ -192,7 +192,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v10.4s, v23.4s, v0.s[3]\n"
"ldr q30, [x26, x20]\n"
"fmls v9.4s, v21.4s, v0.s[2]\n"
- "ldr q29, [x26, x18]\n"
+ "ldr q29, [x26, x9]\n"
"fmla v7.4s, v21.4s, v0.s[2]\n"
"ldr q22, [x26]\n"
"fmls v8.4s, v21.4s, v0.s[1]\n"
@@ -360,7 +360,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"add x14, x14, #16\n"
"ldr q2, [x27, x20]\n"
"mov v4.16b, v2.16b\n"
- "ldr q17, [x27, x18]\n"
+ "ldr q17, [x27, x9]\n"
"mov v12.16b, v2.16b\n"
"ldr q18, [x27]\n"
"fmla v4.4s, v18.4s, v0.s[2]\n"
@@ -420,7 +420,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"blt 3f\n"
"ldr d8, [%[inptr0], x20]\n"
"mov v14.16b, v8.16b\n"
- "ldr d2, [%[inptr0], x18]\n"
+ "ldr d2, [%[inptr0], x9]\n"
"mov v10.16b, v8.16b\n"
"ldr d9, [%[inptr0]]\n"
"fmla v14.4s, v9.4s, v0.s[2]\n"
@@ -432,7 +432,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v14.4s, v2.4s, v0.s[3]\n"
"ldr d5, [x16, x20]\n"
"fmls v10.4s, v12.4s, v0.s[2]\n"
- "ldr d20, [x16, x18]\n"
+ "ldr d20, [x16, x9]\n"
"fmla v9.4s, v12.4s, v0.s[2]\n"
"ldr d3, [x16]\n"
"fmls v7.4s, v12.4s, v0.s[1]\n"
@@ -444,7 +444,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fsub v7.4s, v7.4s, v2.4s\n"
"ldr d17, [x17, x20]\n"
"fadd v10.4s, v10.4s, v4.4s\n"
- "ldr d15, [x17, x18]\n"
+ "ldr d15, [x17, x9]\n"
"fsub v9.4s, v9.4s, v4.4s\n"
"ldr d19, [x17]\n"
"fmla v7.4s, v4.4s, v0.s[1]\n"
@@ -534,7 +534,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"mov v25.16b, v19.16b\n"
"ldr d11, [x25, x20]\n"
"mov v10.16b, v11.16b\n"
- "ldr d23, [x25, x18]\n"
+ "ldr d23, [x25, x9]\n"
"mov v9.16b, v11.16b\n"
"ldr d7, [x25]\n"
"fmla v10.4s, v7.4s, v0.s[2]\n"
@@ -546,7 +546,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v10.4s, v23.4s, v0.s[3]\n"
"ldr d30, [x26, x20]\n"
"fmls v9.4s, v21.4s, v0.s[2]\n"
- "ldr d29, [x26, x18]\n"
+ "ldr d29, [x26, x9]\n"
"fmla v7.4s, v21.4s, v0.s[2]\n"
"ldr d22, [x26]\n"
"fmls v8.4s, v21.4s, v0.s[1]\n"
@@ -714,7 +714,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"add x14, x14, #8\n"
"ldr d2, [x27, x20]\n"
"mov v4.16b, v2.16b\n"
- "ldr d17, [x27, x18]\n"
+ "ldr d17, [x27, x9]\n"
"mov v12.16b, v2.16b\n"
"ldr d18, [x27]\n"
"fmla v4.4s, v18.4s, v0.s[2]\n"
@@ -771,7 +771,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"cbz %w[n_channels], 4f\n"
"ldr s8, [%[inptr0], x20]\n"
"mov v14.16b, v8.16b\n"
- "ldr s2, [%[inptr0], x18]\n"
+ "ldr s2, [%[inptr0], x9]\n"
"mov v10.16b, v8.16b\n"
"ldr s9, [%[inptr0]]\n"
"fmla v14.4s, v9.4s, v0.s[2]\n"
@@ -783,7 +783,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v14.4s, v2.4s, v0.s[3]\n"
"ldr s5, [x16, x20]\n"
"fmls v10.4s, v12.4s, v0.s[2]\n"
- "ldr s20, [x16, x18]\n"
+ "ldr s20, [x16, x9]\n"
"fmla v9.4s, v12.4s, v0.s[2]\n"
"ldr s3, [x16]\n"
"fmls v7.4s, v12.4s, v0.s[1]\n"
@@ -795,7 +795,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fsub v7.4s, v7.4s, v2.4s\n"
"ldr s17, [x17, x20]\n"
"fadd v10.4s, v10.4s, v4.4s\n"
- "ldr s15, [x17, x18]\n"
+ "ldr s15, [x17, x9]\n"
"fsub v9.4s, v9.4s, v4.4s\n"
"ldr s19, [x17]\n"
"fmla v7.4s, v4.4s, v0.s[1]\n"
@@ -885,7 +885,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"mov v25.16b, v19.16b\n"
"ldr s11, [x25, x20]\n"
"mov v10.16b, v11.16b\n"
- "ldr s23, [x25, x18]\n"
+ "ldr s23, [x25, x9]\n"
"mov v9.16b, v11.16b\n"
"ldr s7, [x25]\n"
"fmla v10.4s, v7.4s, v0.s[2]\n"
@@ -897,7 +897,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"fmls v10.4s, v23.4s, v0.s[3]\n"
"ldr s30, [x26, x20]\n"
"fmls v9.4s, v21.4s, v0.s[2]\n"
- "ldr s29, [x26, x18]\n"
+ "ldr s29, [x26, x9]\n"
"fmla v7.4s, v21.4s, v0.s[2]\n"
"ldr s22, [x26]\n"
"fmls v8.4s, v21.4s, v0.s[1]\n"
@@ -1065,7 +1065,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
"add x14, x14, #4\n"
"ldr s2, [x27, x20]\n"
"mov v4.16b, v2.16b\n"
- "ldr s17, [x27, x18]\n"
+ "ldr s17, [x27, x9]\n"
"mov v12.16b, v2.16b\n"
"ldr s18, [x27]\n"
"fmla v4.4s, v18.4s, v0.s[2]\n"
@@ -1129,7 +1129,7 @@ void InputTransform<6, 6, float, float, WinogradRoots::Integers>::transform_tile
: "cc", "v0", "v1", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
"v18", "v19", "v2", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
"v27", "v28", "v29", "v3", "v30", "v31", "v4", "v5", "v6", "v7", "v8",
- "v9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19",
+ "v9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x9", "x19",
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "memory"
);
}