11/*
2- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3- * (C) 2020 Vladimir Sadovnikov <[email protected] > 2+ * Copyright (C) 2025 Linux Studio Plugins Project <https://lsp-plug.in/>
3+ * (C) 2025 Vladimir Sadovnikov <[email protected] > 44 *
55 * This file is part of lsp-dsp-lib
66 * Created on: 31 мар. 2020 г.
114114 __ASM_EMIT(" movhlps %" x0 " , %" x2) /* xmm2 = B+D B+D */ \
115115 __ASM_EMIT(" addps %" x2 " , %" x0) /* xmm0 = A+C+B+D */
116116
117- /* Get cosine of angle between two vectors
118- * Input:
119- * x0 = vector1 [dx dy dz ? ]
120- * x1 = vector2 [dx dy dz ? ]
121- * m0 = -1
122- * m1 = +1
123- *
124- * Output:
125- * x0 = vector1 * vector2 [ S0 ? ? ? ]
126- */
127- #define CALC_COSINE2V (x0, x1, x2, x3, x4, m0, m1 ) \
128- __ASM_EMIT (" movaps %" x0 " , %" x2) /* xmm2 = dx0 dy0 dz0 ? */ \
129- __ASM_EMIT(" mulps %" x1 " , %" x0) /* xmm0 = dx0*dx1 dy0*dy1 dz0*dz1 ? */ \
130- __ASM_EMIT(" mulps %" x2 " , %" x2) /* xmm2 = dx0*dx0 dy0*dy0 dz0*dz0 ? */ \
131- __ASM_EMIT(" mulps %" x1 " , %" x1) /* xmm1 = dx1*dx1 dy1*dy1 dz1*dz1 ? */ \
132- __ASM_EMIT(" movhlps %" x2 " , %" x4) /* xmm4 = dz0*dz0 */ \
133- __ASM_EMIT(" movhlps %" x1 " , %" x3) /* xmm3 = dz1*dz1 */ \
134- __ASM_EMIT(" addss %" x4 " , %" x2) /* xmm2 = dx0*dx0+dz0*dz0 dy0*dy0 dz0*dz0 ? */ \
135- __ASM_EMIT(" addss %" x3 " , %" x1) /* xmm1 = dx1*dx1+dz1*dz1 dy1*dy1 dz1*dz1 ? */ \
136- __ASM_EMIT(" unpcklps %" x2 " , %" x2) /* xmm2 = dx0*dx0+dz0*dz0 dx0*dx0+dz0*dz0 dy0*dy0 dy0*dy0 */ \
137- __ASM_EMIT(" unpcklps %" x1 " , %" x1) /* xmm1 = dx1*dx1+dz1*dz1 dx1*dx1+dz1*dz1 dy1*dy1 dy1*dy1 */ \
138- __ASM_EMIT(" movhlps %" x2 " , %" x4) /* xmm4 = dy0*dy0 */ \
139- __ASM_EMIT(" movhlps %" x1 " , %" x3) /* xmm3 = dy1*dy1 */ \
140- __ASM_EMIT(" addss %" x4 " , %" x2) /* xmm2 = dx0*dx0+dz0*dz0+dy0*dy0 */ \
141- __ASM_EMIT(" addss %" x3 " , %" x1) /* xmm1 = dx1*dx1+dz1*dz1+dy1*dy1 */ \
142- __ASM_EMIT(" movhlps %" x0 " , %" x4) /* xmm4 = dz0*dz1 */ \
143- __ASM_EMIT(" sqrtss %" x2 " , %" x2) /* xmm2 = sqrtf(dx0*dx0+dz0*dz0+dy0*dy0) */ \
144- __ASM_EMIT(" addss %" x4 " , %" x0) /* xmm0 = dz1*dz1+dx0*dx1 dy0*dy1 dz0*dz1 ? */ \
145- __ASM_EMIT(" sqrtss %" x1 " , %" x1) /* xmm1 = sqrtf(dx1*dx1+dz1*dz1+dy1*dy1) */ \
146- __ASM_EMIT(" unpcklps %" x0 " , %" x0) /* xmm0 = dz1*dz1+dx0*dx1 dz1*dz1+dx0*dx1 dy0*dy1 dy0*dy1 */ \
147- __ASM_EMIT(" mulss %" x1 " , %" x2) /* xmm2 = w */ \
148- __ASM_EMIT(" movhlps %" x0 " , %" x4) /* xmm4 = dy0*dy1 */ \
149- __ASM_EMIT(" xorps %" x1 " , %" x1) /* xmm1 = 0 */ \
150- __ASM_EMIT(" addss %" x4 " , %" x0) /* xmm0 = dz1*dz1+dx0*dx1+dy0*dy1 */ \
151- __ASM_EMIT(" ucomiss %" x1 " , %" x2) /* xmm2 <?> 0 */ \
152- __ASM_EMIT(" jbe 1000000f" ) /* xmm2 <= 0 */ \
153- __ASM_EMIT(" divss %" x2 " , %" x0) /* xmm0 = (dz1*dz1+dx0*dx1+dy0*dy1)/w */ \
154- __ASM_EMIT(" 1000000:" ) \
155- __ASM_EMIT(" ucomiss %" m0 " , %" x0) \
156- __ASM_EMIT(" jae 1000001f" ) \
157- __ASM_EMIT(" movss %" m0 " , %" x0) \
158- __ASM_EMIT(" jmp 1000002f" ) \
159- __ASM_EMIT(" 1000001:" ) \
160- __ASM_EMIT(" ucomiss %" m1 " , %" x0) \
161- __ASM_EMIT(" jbe 1000002f" ) \
162- __ASM_EMIT(" movss %" m1 " , %" x0) \
163- __ASM_EMIT(" jmp 1000002f" ) \
164- __ASM_EMIT(" 1000002:" ) \
165-
166-
167117/* 3x vector multiplication
168118 * Input:
169119 * x0 = vector1 [dx dy dz ? ]
@@ -1485,48 +1435,6 @@ namespace lsp
14851435 return x0;
14861436 }
14871437
1488- float calc_angle3d_v2 (const vector3d_t *v1, const vector3d_t *v2)
1489- {
1490- float x0, x1, x2, x3, x4;
1491-
1492- ARCH_X86_ASM
1493- (
1494- /* Load vectors */
1495- __ASM_EMIT (" movups (%[v1]), %[x0]" ) /* xmm0 = dx0 dy0 dz0 dw0 */
1496- __ASM_EMIT (" movups (%[v2]), %[x1]" ) /* xmm1 = dx1 dy1 dz1 dw1 */
1497- CALC_COSINE2V (" [x0]" , " [x1]" , " [x2]" , " [x3]" , " [x4]" , " [M_ONE]" , " [ONE]" )
1498- : [x0] " =&x" (x0), [x1] " =&x" (x1), [x2] " =&x" (x2), [x3] " =&x" (x3),
1499- [x4] " =&x" (x4)
1500- : [v1] " r" (v1), [v2] " r" (v2),
1501- [ONE] " m" (ONE),
1502- [M_ONE] " m" (X_MINUS_ONE)
1503- : " cc" , " memory"
1504- );
1505-
1506- return x0;
1507- }
1508-
1509- float calc_angle3d_vv (const vector3d_t *v)
1510- {
1511- float x0, x1, x2, x3, x4;
1512-
1513- ARCH_X86_ASM
1514- (
1515- /* Load vectors */
1516- __ASM_EMIT (" movups 0x00(%[v]), %[x0]" ) /* xmm0 = dx0 dy0 dz0 dw0 */
1517- __ASM_EMIT (" movups 0x10(%[v]), %[x1]" ) /* xmm1 = dx1 dy1 dz1 dw1 */
1518- CALC_COSINE2V (" [x0]" , " [x1]" , " [x2]" , " [x3]" , " [x4]" , " [M_ONE]" , " [ONE]" )
1519- : [x0] " =&x" (x0), [x1] " =&x" (x1), [x2] " =&x" (x2), [x3] " =&x" (x3),
1520- [x4] " =&x" (x4)
1521- : [v] " r" (v),
1522- [ONE] " m" (ONE),
1523- [M_ONE] " m" (X_MINUS_ONE)
1524- : " cc" , " memory"
1525- );
1526-
1527- return x0;
1528- }
1529-
15301438 void calc_normal3d_v2 (vector3d_t *n, const vector3d_t *v1, const vector3d_t *v2)
15311439 {
15321440 float x0, x1, x2, x3;
@@ -3009,8 +2917,8 @@ namespace lsp
30092917 #undef STR_SPLIT_1P
30102918 #undef STR_SPLIT_2P
30112919 }
3012- }
3013- }
2920+ } /* namespace sse */
2921+ } /* namespace lsp */
30142922
30152923#undef SCALAR_MUL
30162924#undef MAT3_TRANSPOSE
@@ -3023,6 +2931,5 @@ namespace lsp
30232931#undef VECTOR_MUL3
30242932#undef VECTOR_MUL
30252933#undef VECTOR_XCHG
3026- #undef CALC_COSINE2V
30272934
30282935#endif /* PRIVATE_DSP_ARCH_X86_SSE_3DMATH_H_ */
0 commit comments