@@ -33,14 +33,14 @@ namespace lsp
3333 IF_ARCH_AARCH64 (
3434 static const float msmatrix_const[] __lsp_aligned16 =
3535 {
36- LSP_DSP_VEC8 (0 .5f )
36+ LSP_DSP_VEC4 (0 .5f )
3737 };
3838 );
3939
4040 void lr_to_ms (float *m, float *s, const float *l, const float *r, size_t count)
4141 {
4242 ARCH_AARCH64_ASM (
43- __ASM_EMIT (" ldp q24, q25, [%[HALF]]" ) // v24 = 0.5, v25 = 0.5
43+ __ASM_EMIT (" ldr q24, [%[HALF]]" ) // v24 = 0.5
4444 // x16 blocks
4545 __ASM_EMIT (" subs %[count], %[count], #16" )
4646 __ASM_EMIT (" b.lo 2f" )
@@ -58,13 +58,13 @@ namespace lsp
5858 __ASM_EMIT (" fsub v22.4s, v2.4s, v6.4s" )
5959 __ASM_EMIT (" fsub v23.4s, v3.4s, v7.4s" )
6060 __ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) // v0 = (l + r)/2
61- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" )
61+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" )
6262 __ASM_EMIT (" fmul v2.4s, v18.4s, v24.4s" )
63- __ASM_EMIT (" fmul v3.4s, v19.4s, v25 .4s" )
63+ __ASM_EMIT (" fmul v3.4s, v19.4s, v24 .4s" )
6464 __ASM_EMIT (" fmul v4.4s, v20.4s, v24.4s" ) // v4 = (l - r)/2
65- __ASM_EMIT (" fmul v5.4s, v21.4s, v25 .4s" )
65+ __ASM_EMIT (" fmul v5.4s, v21.4s, v24 .4s" )
6666 __ASM_EMIT (" fmul v6.4s, v22.4s, v24.4s" )
67- __ASM_EMIT (" fmul v7.4s, v23.4s, v25 .4s" )
67+ __ASM_EMIT (" fmul v7.4s, v23.4s, v24 .4s" )
6868 __ASM_EMIT (" stp q0, q1, [%[m], #0x00]" )
6969 __ASM_EMIT (" stp q2, q3, [%[m], #0x20]" )
7070 __ASM_EMIT (" stp q4, q5, [%[s], #0x00]" )
@@ -86,9 +86,9 @@ namespace lsp
8686 __ASM_EMIT (" fsub v20.4s, v0.4s, v4.4s" ) // v20 = l - r
8787 __ASM_EMIT (" fsub v21.4s, v1.4s, v5.4s" )
8888 __ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) // v0 = (l + r)/2
89- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" )
89+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" )
9090 __ASM_EMIT (" fmul v4.4s, v20.4s, v24.4s" ) // v4 = (l - r)/2
91- __ASM_EMIT (" fmul v5.4s, v21.4s, v25 .4s" )
91+ __ASM_EMIT (" fmul v5.4s, v21.4s, v24 .4s" )
9292 __ASM_EMIT (" stp q0, q1, [%[m], #0x00]" )
9393 __ASM_EMIT (" stp q4, q5, [%[s], #0x00]" )
9494 __ASM_EMIT (" sub %[count], %[count], #8" )
@@ -142,7 +142,7 @@ namespace lsp
142142 " v4" , " v5" , " v6" , " v7" ,
143143 " v16" , " v17" , " v18" , " v19" ,
144144 " v20" , " v21" , " v22" , " v23" ,
145- " v24" , " v25 "
145+ " v24"
146146 );
147147 }
148148
@@ -238,7 +238,7 @@ namespace lsp
238238 }
239239
240240 #define LR_TO_PART (OP ) \
241- __ASM_EMIT (" ldp q24, q25, [%[HALF]]" ) /* v24 = 0.5, v25 = 0.5 */ \
241+ __ASM_EMIT (" ldr q24, [%[HALF]]" ) /* v24 = 0.5 */ \
242242 /* x16 blocks */ \
243243 __ASM_EMIT (" subs %[count], %[count], #16" ) \
244244 __ASM_EMIT (" b.lo 2f" ) \
@@ -252,9 +252,9 @@ namespace lsp
252252 __ASM_EMIT (OP " v18.4s, v2.4s, v6.4s" ) \
253253 __ASM_EMIT (OP " v19.4s, v3.4s, v7.4s" ) \
254254 __ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) /* v0 = (l op r)/2 */ \
255- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" ) \
255+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" ) \
256256 __ASM_EMIT (" fmul v2.4s, v18.4s, v24.4s" ) \
257- __ASM_EMIT (" fmul v3.4s, v19.4s, v25 .4s" ) \
257+ __ASM_EMIT (" fmul v3.4s, v19.4s, v24 .4s" ) \
258258 __ASM_EMIT (" stp q0, q1, [%[dst], #0x00]" ) \
259259 __ASM_EMIT (" stp q2, q3, [%[dst], #0x20]" ) \
260260 __ASM_EMIT (" subs %[count], %[count], #16" ) \
@@ -271,7 +271,7 @@ namespace lsp
271271 __ASM_EMIT (OP " v16.4s, v0.4s, v4.4s" ) /* v16 = l op r */ \
272272 __ASM_EMIT (OP " v17.4s, v1.4s, v5.4s" ) \
273273 __ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) /* v0 = (l op r)/2 */ \
274- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" ) \
274+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" ) \
275275 __ASM_EMIT (" stp q0, q1, [%[dst], #0x00]" ) \
276276 __ASM_EMIT (" sub %[count], %[count], #8" ) \
277277 __ASM_EMIT (" add %[l], %[l], #0x20" ) \
@@ -318,7 +318,7 @@ namespace lsp
318318 " v0" , " v1" , " v2" , " v3" ,
319319 " v4" , " v5" , " v6" , " v7" ,
320320 " v16" , " v17" , " v18" , " v19" ,
321- " v24" , " v25 "
321+ " v24"
322322 );
323323 }
324324
@@ -333,7 +333,7 @@ namespace lsp
333333 " v0" , " v1" , " v2" , " v3" ,
334334 " v4" , " v5" , " v6" , " v7" ,
335335 " v16" , " v17" , " v18" , " v19" ,
336- " v24" , " v25 "
336+ " v24"
337337 );
338338 }
339339
0 commit comments