aboutsummaryrefslogtreecommitdiff
path: root/Src/ns-eel2/asm-nseel-x86-gcc.c
diff options
context:
space:
mode:
authorJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
committerJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
commit20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/ns-eel2/asm-nseel-x86-gcc.c
parent537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
downloadwinamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
Initial community commit
Diffstat (limited to 'Src/ns-eel2/asm-nseel-x86-gcc.c')
-rw-r--r--Src/ns-eel2/asm-nseel-x86-gcc.c2153
1 files changed, 2153 insertions, 0 deletions
diff --git a/Src/ns-eel2/asm-nseel-x86-gcc.c b/Src/ns-eel2/asm-nseel-x86-gcc.c
new file mode 100644
index 00000000..5c3d747e
--- /dev/null
+++ b/Src/ns-eel2/asm-nseel-x86-gcc.c
@@ -0,0 +1,2153 @@
+/* note: only EEL_F_SIZE=8 is now supported (no float EEL_F's) */
+
+#ifndef AMD64ABI
+#define X64_EXTRA_STACK_SPACE 32 // win32 requires allocating space for 4 parameters at 8 bytes each, even though we pass via register
+#endif
+
+void nseel_asm_1pdd(void)
+{
+ __asm__(
+
+ FUNCTION_MARKER
+
+ "movl $0xfefefefe, %edi\n"
+#ifdef TARGET_X64
+ "fstpl (%rsi)\n"
+ "movq (%rsi), %xmm0\n"
+ #ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "call *%edi\n"
+ "movl %r15, %rsi\n"
+ #else
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ #endif
+ "movq xmm0, (%rsi)\n"
+ "fldl (%rsi)\n"
+#else
+ "subl $16, %esp\n"
+ "fstpl (%esp)\n"
+ "call *%edi\n"
+ "addl $16, %esp\n"
+#endif
+
+ FUNCTION_MARKER
+
+ );
+}
+void nseel_asm_1pdd_end(void){}
+
+void nseel_asm_2pdd(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+
+ "movl $0xfefefefe, %edi\n"
+#ifdef TARGET_X64
+ "fstpl 8(%rsi)\n"
+ "fstpl (%rsi)\n"
+ "movq 8(%rsi), %xmm1\n"
+ "movq (%rsi), %xmm0\n"
+ #ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "call *%edi\n"
+ "movl %r15, %rsi\n"
+ #else
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ #endif
+ "movq xmm0, (%rsi)\n"
+ "fldl (%rsi)\n"
+#else
+ "subl $16, %esp\n"
+ "fstpl 8(%esp)\n"
+ "fstpl (%esp)\n"
+ "call *%edi\n"
+ "addl $16, %esp\n"
+#endif
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_2pdd_end(void){}
+
+void nseel_asm_2pdds(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+
+ "movl $0xfefefefe, %eax\n"
+#ifdef TARGET_X64
+ "fstpl (%rsi)\n"
+ "movq (%rdi), %xmm0\n"
+ "movq (%rsi), %xmm1\n"
+ #ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "movl %rdi, %r14\n"
+ "call *%eax\n"
+ "movl %r14, %rdi\n" /* restore thrashed rdi */
+ "movl %r15, %rsi\n"
+ "movl %r14, %rax\n" /* set return value */
+ "movq xmm0, (%r14)\n"
+ #else
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%eax\n"
+ "movq xmm0, (%edi)\n"
+ "movl %edi, %eax\n" /* set return value */
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ #endif
+#else
+ "subl $8, %esp\n"
+ "fstpl (%esp)\n"
+ "pushl 4(%edi)\n" /* push parameter */
+ "pushl (%edi)\n" /* push the rest of the parameter */
+ "call *%eax\n"
+ "addl $16, %esp\n"
+ "fstpl (%edi)\n" /* store result */
+ "movl %edi, %eax\n" /* set return value */
+#endif
+
+ // denormal-fix result (this is only currently used for pow_op, so we want this!)
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+
+ FUNCTION_MARKER
+
+ );
+}
+void nseel_asm_2pdds_end(void){}
+
+
+
+//---------------------------------------------------------------------------------------------------------------
+
+
+// do nothing, eh
+void nseel_asm_exec2(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ ""
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_exec2_end(void) { }
+
+
+
+void nseel_asm_invsqrt(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0x5f3759df, %edx\n"
+ "fsts (%esi)\n"
+#ifdef TARGET_X64
+ "movl 0xfefefefe, %rax\n"
+ "fmul" EEL_F_SUFFIX " (%rax)\n"
+ "movsxl (%esi), %rcx\n"
+#else
+ "fmul" EEL_F_SUFFIX " (0xfefefefe)\n"
+ "movl (%esi), %ecx\n"
+#endif
+ "sarl $1, %ecx\n"
+ "subl %ecx, %edx\n"
+ "movl %edx, (%esi)\n"
+ "fmuls (%esi)\n"
+ "fmuls (%esi)\n"
+#ifdef TARGET_X64
+ "movl 0xfefefefe, %rax\n"
+ "fadd" EEL_F_SUFFIX " (%rax)\n"
+#else
+ "fadd" EEL_F_SUFFIX " (0xfefefefe)\n"
+#endif
+ "fmuls (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_invsqrt_end(void) {}
+
+
+void nseel_asm_dbg_getstackptr(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "ffree %st(0)\n"
+#else
+ "fstpl %st(0)\n"
+#endif
+ "movl %esp, (%esi)\n"
+ "fildl (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_dbg_getstackptr_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_sin(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fsin\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sin_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_cos(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcos\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_cos_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_tan(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fptan\n"
+ "fstp %st(0)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_tan_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_sqr(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fmul %st(0), %st(0)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sqr_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_sqrt(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fabs\n"
+ "fsqrt\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sqrt_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_log(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fldln2\n"
+ "fxch\n"
+ "fyl2x\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_log_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_log10(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fldlg2\n"
+ "fxch\n"
+ "fyl2x\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_log10_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_abs(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fabs\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_abs_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_assign(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll (%rax), %rdx\n"
+ "movll %rdx, %rcx\n"
+ "shrl $32, %rdx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "movll %rdi, %rax\n"
+ "jg 0f\n"
+ "subl %ecx, %ecx\n"
+ "0:\n"
+ "movll %rcx, (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl (%eax), %ecx\n"
+ "movl 4(%eax), %edx\n"
+ "movl %edx, %eax\n"
+ "addl $0x00100000, %eax\n" // if exponent is zero, make exponent 0x7ff, if 7ff, make 7fe
+ "andl $0x7ff00000, %eax\n"
+ "cmpl $0x00200000, %eax\n"
+ "jg 0f\n"
+ "subl %ecx, %ecx\n"
+ "subl %edx, %edx\n"
+ "0:\n"
+ "movl %edi, %eax\n"
+ "movl %ecx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+
+ FUNCTION_MARKER
+ );
+
+#endif
+}
+void nseel_asm_assign_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_assign_fromfp(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fstpl (%edi)\n"
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "movl %edi, %eax\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_assign_fromfp_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_assign_fast_fromfp(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "movl %edi, %eax\n"
+ "fstpl (%edi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_assign_fast_fromfp_end(void) {}
+
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_assign_fast(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll (%rax), %rdx\n"
+ "movll %rdx, (%edi)\n"
+ "movll %rdi, %rax\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl (%eax), %ecx\n"
+ "movl %ecx, (%edi)\n"
+ "movl 4(%eax), %ecx\n"
+
+ "movl %edi, %eax\n"
+ "movl %ecx, 4(%edi)\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+}
+void nseel_asm_assign_fast_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_add(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "faddp %st(1)\n"
+#else
+ "fadd\n"
+#endif
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_add_end(void) {}
+
+void nseel_asm_add_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fadd" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_add_op_end(void) {}
+
+void nseel_asm_add_op_fast(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fadd" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_add_op_fast_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_sub(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "fsubrp %st(0), %st(1)\n"
+#else
+ #ifdef __GNUC__
+ #ifdef __INTEL_COMPILER
+ "fsub\n"
+ #else
+ "fsubr\n" // gnuc has fsub/fsubr backwards, ack
+ #endif
+ #else
+ "fsub\n"
+ #endif
+#endif
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sub_end(void) {}
+
+void nseel_asm_sub_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fsubr" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sub_op_end(void) {}
+
+void nseel_asm_sub_op_fast(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fsubr" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_sub_op_fast_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_mul(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "fmulp %st(0), %st(1)\n"
+#else
+ "fmul\n"
+#endif
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_mul_end(void) {}
+
+void nseel_asm_mul_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fmul" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_mul_op_end(void) {}
+
+void nseel_asm_mul_op_fast(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fmul" EEL_F_SUFFIX " (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_mul_op_fast_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_div(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "fdivrp %st(1)\n"
+#else
+ #ifdef __GNUC__
+ #ifdef __INTEL_COMPILER
+ "fdiv\n"
+ #else
+ "fdivr\n" // gcc inline asm seems to have fdiv/fdivr backwards
+ #endif
+ #else
+ "fdiv\n"
+ #endif
+#endif
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_div_end(void) {}
+
+void nseel_asm_div_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+#ifdef __clang__
+ "fdivp %st(1)\n"
+#else
+ #ifndef __GNUC__
+ "fdivr\n"
+ #else
+ #ifdef __INTEL_COMPILER
+ "fdivp %st(1)\n"
+ #else
+ "fdiv\n"
+ #endif
+ #endif
+#endif
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ "movl 4(%edi), %edx\n"
+ "addl $0x00100000, %edx\n"
+ "andl $0x7FF00000, %edx\n"
+ "cmpl $0x00200000, %edx\n"
+ "jg 0f\n"
+ "subl %edx, %edx\n"
+#ifdef TARGET_X64
+ "movll %rdx, (%rdi)\n"
+#else
+ "movl %edx, (%edi)\n"
+ "movl %edx, 4(%edi)\n"
+#endif
+ "0:\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_div_op_end(void) {}
+
+void nseel_asm_div_op_fast(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+#ifdef __clang__
+ "fdivp %st(1)\n"
+#else
+ #ifndef __GNUC__
+ "fdivr\n"
+ #else
+ #ifdef __INTEL_COMPILER
+ "fdivp %st(1)\n"
+ #else
+ "fdiv\n"
+ #endif
+ #endif
+#endif
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_div_op_fast_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_mod(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fabs\n"
+ "fistpl (%esi)\n"
+ "fabs\n"
+ "fistpl 4(%esi)\n"
+ "xorl %edx, %edx\n"
+ "cmpl $0, (%esi)\n"
+ "je 0f\n" // skip devide, set return to 0
+ "movl 4(%esi), %eax\n"
+ "divl (%esi)\n"
+ "0:\n"
+ "movl %edx, (%esi)\n"
+ "fildl (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_mod_end(void) {}
+
+void nseel_asm_shl(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpl (%esi)\n"
+ "fistpl 4(%esi)\n"
+ "movl (%esi), %ecx\n"
+ "movl 4(%esi), %eax\n"
+ "shll %cl, %eax\n"
+ "movl %eax, (%esi)\n"
+ "fildl (%esi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_shl_end(void) {}
+
+void nseel_asm_shr(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpl (%esi)\n"
+ "fistpl 4(%esi)\n"
+ "movl (%esi), %ecx\n"
+ "movl 4(%esi), %eax\n"
+ "sarl %cl, %eax\n"
+ "movl %eax, (%esi)\n"
+ "fildl (%esi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_shr_end(void) {}
+
+
+void nseel_asm_mod_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fxch\n"
+ "fabs\n"
+ "fistpl (%edi)\n"
+ "fabs\n"
+ "fistpl (%esi)\n"
+ "xorl %edx, %edx\n"
+ "cmpl $0, (%edi)\n"
+ "je 0f\n" // skip devide, set return to 0
+ "movl (%esi), %eax\n"
+ "divl (%edi)\n"
+ "0:\n"
+ "movl %edx, (%edi)\n"
+ "fildl (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_mod_op_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_or(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpll (%esi)\n"
+ "fistpll 8(%esi)\n"
+#ifdef TARGET_X64
+ "movll 8(%rsi), %rdi\n"
+ "orll %rdi, (%rsi)\n"
+#else
+ "movl 8(%esi), %edi\n"
+ "movl 12(%esi), %ecx\n"
+ "orl %edi, (%esi)\n"
+ "orl %ecx, 4(%esi)\n"
+#endif
+ "fildll (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_or_end(void) {}
+
+void nseel_asm_or0(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpll (%esi)\n"
+ "fildll (%esi)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_or0_end(void) {}
+
+void nseel_asm_or_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fxch\n"
+ "fistpll (%edi)\n"
+ "fistpll (%esi)\n"
+#ifdef TARGET_X64
+ "movll (%rsi), %rax\n"
+ "orll %rax, (%rdi)\n"
+#else
+ "movl (%esi), %eax\n"
+ "movl 4(%esi), %ecx\n"
+ "orl %eax, (%edi)\n"
+ "orl %ecx, 4(%edi)\n"
+#endif
+ "fildll (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_or_op_end(void) {}
+
+
+void nseel_asm_xor(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpll (%esi)\n"
+ "fistpll 8(%esi)\n"
+#ifdef TARGET_X64
+ "movll 8(%rsi), %rdi\n"
+ "xorll %rdi, (%rsi)\n"
+#else
+ "movl 8(%esi), %edi\n"
+ "movl 12(%esi), %ecx\n"
+ "xorl %edi, (%esi)\n"
+ "xorl %ecx, 4(%esi)\n"
+#endif
+ "fildll (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_xor_end(void) {}
+
+void nseel_asm_xor_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fxch\n"
+ "fistpll (%edi)\n"
+ "fistpll (%esi)\n"
+#ifdef TARGET_X64
+ "movll (%rsi), %rax\n"
+ "xorll %rax, (%rdi)\n"
+#else
+ "movl (%esi), %eax\n"
+ "movl 4(%esi), %ecx\n"
+ "xorl %eax, (%edi)\n"
+ "xorl %ecx, 4(%edi)\n"
+#endif
+ "fildll (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_xor_op_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_and(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fistpll (%esi)\n"
+ "fistpll 8(%esi)\n"
+#ifdef TARGET_X64
+ "movll 8(%rsi), %rdi\n"
+ "andll %rdi, (%rsi)\n"
+#else
+ "movl 8(%esi), %edi\n"
+ "movl 12(%esi), %ecx\n"
+ "andl %edi, (%esi)\n"
+ "andl %ecx, 4(%esi)\n"
+#endif
+ "fildll (%esi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_and_end(void) {}
+
+void nseel_asm_and_op(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fxch\n"
+ "fistpll (%edi)\n"
+ "fistpll (%esi)\n"
+#ifdef TARGET_X64
+ "movll (%rsi), %rax\n"
+ "andll %rax, (%rdi)\n"
+#else
+ "movl (%esi), %eax\n"
+ "movl 4(%esi), %ecx\n"
+ "andl %eax, (%edi)\n"
+ "andl %ecx, 4(%edi)\n"
+#endif
+ "fildll (%edi)\n"
+ "movl %edi, %eax\n"
+ "fstp" EEL_F_SUFFIX " (%edi)\n"
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_and_op_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_uplus(void) // this is the same as doing nothing, it seems
+{
+ __asm__(
+ FUNCTION_MARKER
+ ""
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_uplus_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_uminus(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fchs\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_uminus_end(void) {}
+
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_sign(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+
+#ifdef TARGET_X64
+
+
+ "fst" EEL_F_SUFFIX " (%rsi)\n"
+ "mov" EEL_F_SUFFIX " (%rsi), %rdx\n"
+ "movll $0x7FFFFFFFFFFFFFFF, %rcx\n"
+ "testll %rcx, %rdx\n"
+ "jz 0f\n" // zero zero, return the value passed directly
+ // calculate sign
+ "incll %rcx\n" // rcx becomes 0x80000...
+ "fstp %st(0)\n"
+ "fld1\n"
+ "testl %rcx, %rdx\n"
+ "jz 0f\n"
+ "fchs\n"
+ "0:\n"
+
+#else
+
+ "fsts (%esi)\n"
+ "movl (%esi), %ecx\n"
+ "movl $0x7FFFFFFF, %edx\n"
+ "testl %edx, %ecx\n"
+ "jz 0f\n" // zero zero, return the value passed directly
+ // calculate sign
+ "incl %edx\n" // edx becomes 0x8000...
+ "fstp %st(0)\n"
+ "fld1\n"
+ "testl %edx, %ecx\n"
+ "jz 0f\n"
+ "fchs\n"
+ "0:\n"
+
+#endif
+ FUNCTION_MARKER
+);
+}
+void nseel_asm_sign_end(void) {}
+
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_bnot(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "testl %eax, %eax\n"
+ "setz %al\n"
+ "andl $0xff, %eax\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_bnot_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_fcall(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edx\n"
+#ifdef TARGET_X64
+ "subl $8, %esp\n"
+ "call *%edx\n"
+ "addl $8, %esp\n"
+#else
+ "subl $12, %esp\n" /* keep stack 16 byte aligned, 4 bytes for return address */
+ "call *%edx\n"
+ "addl $12, %esp\n"
+#endif
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_fcall_end(void) {}
+
+void nseel_asm_band(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "testl %eax, %eax\n"
+ "jz 0f\n"
+
+ "movl $0xfefefefe, %ecx\n"
+#ifdef TARGET_X64
+ "subl $8, %rsp\n"
+#else
+ "subl $12, %esp\n"
+#endif
+ "call *%ecx\n"
+#ifdef TARGET_X64
+ "addl $8, %rsp\n"
+#else
+ "addl $12, %esp\n"
+#endif
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_band_end(void) {}
+
+void nseel_asm_bor(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "testl %eax, %eax\n"
+ "jnz 0f\n"
+
+ "movl $0xfefefefe, %ecx\n"
+#ifdef TARGET_X64
+ "subl $8, %rsp\n"
+#else
+ "subl $12, %esp\n"
+#endif
+ "call *%ecx\n"
+#ifdef TARGET_X64
+ "addl $8, %rsp\n"
+#else
+ "addl $12, %esp\n"
+#endif
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_bor_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_equal(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "fsubp %st(1)\n"
+#else
+ "fsub\n"
+#endif
+
+ "fabs\n"
+#ifdef TARGET_X64
+ "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact]
+#else
+ "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact]
+#endif
+ "fstsw %ax\n"
+ "andl $256, %eax\n" // old behavior: if 256 set, true (NaN means true)
+
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_equal_end(void) {}
+//
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_equal_exact(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcompp\n"
+ "fstsw %ax\n" // for equal 256 and 1024 should be clear, 16384 should be set
+ "andl $17664, %eax\n" // mask C4/C3/C1, bits 8/10/14, 16384|256|1024 -- if equals 16384, then equality
+ "cmp $16384, %eax\n"
+ "je 0f\n"
+ "subl %eax, %eax\n"
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_equal_exact_end(void) {}
+
+void nseel_asm_notequal_exact(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcompp\n"
+ "fstsw %ax\n" // for equal 256 and 1024 should be clear, 16384 should be set
+ "andl $17664, %eax\n" // mask C4/C3/C1, bits 8/10/14, 16384|256|1024 -- if equals 16384, then equality
+ "cmp $16384, %eax\n"
+ "je 0f\n"
+ "subl %eax, %eax\n"
+ "0:\n"
+ "xorl $16384, %eax\n" // flip the result
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_notequal_exact_end(void) {}
+//
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_notequal(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef __clang__
+ "fsubp %st(1)\n"
+#else
+ "fsub\n"
+#endif
+
+ "fabs\n"
+#ifdef TARGET_X64
+ "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact]
+#else
+ "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact]
+#endif
+ "fstsw %ax\n"
+ "andl $256, %eax\n"
+ "xorl $256, %eax\n" // old behavior: if 256 set, FALSE (NaN makes for false)
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_notequal_end(void) {}
+
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_above(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcompp\n"
+ "fstsw %ax\n"
+ "andl $1280, %eax\n" // (1024+256) old behavior: NaN would mean 1, preserve that
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_above_end(void) {}
+
+//---------------------------------------------------------------------------------------------------------------
+void nseel_asm_beloweq(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcompp\n"
+ "fstsw %ax\n"
+ "andl $256, %eax\n" // old behavior: NaN would be 0 (ugh)
+ "xorl $256, %eax\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_beloweq_end(void) {}
+
+
+void nseel_asm_booltofp(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "testl %eax, %eax\n"
+ "jz 0f\n"
+ "fld1\n"
+ "jmp 1f\n"
+ "0:\n"
+ "fldz\n"
+ "1:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_booltofp_end(void) {}
+
+void nseel_asm_fptobool(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fabs\n"
+#ifdef TARGET_X64
+ "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact]
+#else
+ "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact]
+#endif
+ "fstsw %ax\n"
+ "andl $256, %eax\n"
+ "xorl $256, %eax\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_fptobool_end(void) {}
+
+void nseel_asm_fptobool_rev(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fabs\n"
+#ifdef TARGET_X64
+ "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact]
+#else
+ "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact]
+#endif
+ "fstsw %ax\n"
+ "andl $256, %eax\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_fptobool_rev_end(void) {}
+
+void nseel_asm_min(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fcomp" EEL_F_SUFFIX " (%eax)\n"
+ "movl %eax, %ecx\n"
+ "fstsw %ax\n"
+ "testl $256, %eax\n"
+ "movl %ecx, %eax\n"
+ "jz 0f\n"
+ "movl %edi, %eax\n"
+ "0:\n"
+ FUNCTION_MARKER
+ );
+
+}
+void nseel_asm_min_end(void) {}
+
+void nseel_asm_max(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fld" EEL_F_SUFFIX " (%edi)\n"
+ "fcomp" EEL_F_SUFFIX " (%eax)\n"
+ "movl %eax, %ecx\n"
+ "fstsw %ax\n"
+ "testl $256, %eax\n"
+ "movl %ecx, %eax\n"
+ "jnz 0f\n"
+ "movl %edi, %eax\n"
+ "0:\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_max_end(void) {}
+
+
+
+void nseel_asm_min_fp(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcom\n"
+ "fstsw %ax\n"
+ "testl $256, %eax\n"
+ "jz 0f\n"
+ "fxch\n"
+ "0:\n"
+ "fstp %st(0)\n"
+ FUNCTION_MARKER
+ );
+
+}
+void nseel_asm_min_fp_end(void) {}
+
+void nseel_asm_max_fp(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+ "fcom\n"
+ "fstsw %ax\n"
+ "testl $256, %eax\n"
+ "jnz 0f\n"
+ "fxch\n"
+ "0:\n"
+ "fstp %st(0)\n"
+ FUNCTION_MARKER
+ );
+}
+void nseel_asm_max_fp_end(void) {}
+
+
+
+// just generic functions left, yay
+
+
+
+
+void _asm_generic3parm(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+
+#ifdef AMD64ABI
+
+ "movl %rsi, %r15\n"
+ "movl %rdi, %rdx\n" // third parameter = parm
+ "movl $0xfefefefe, %rdi\n" // first parameter= context
+
+ "movl %ecx, %rsi\n" // second parameter = parm
+ "movl %rax, %rcx\n" // fourth parameter = parm
+ "movl $0xfefefefe, %rax\n" // call function
+ "call *%rax\n"
+
+ "movl %r15, %rsi\n"
+#else
+ "movl %ecx, %edx\n" // second parameter = parm
+ "movl $0xfefefefe, %ecx\n" // first parameter= context
+ "movl %rdi, %r8\n" // third parameter = parm
+ "movl %rax, %r9\n" // fourth parameter = parm
+ "movl $0xfefefefe, %edi\n" // call function
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+#endif
+
+#else
+
+ "movl $0xfefefefe, %edx\n"
+ "pushl %eax\n" // push parameter
+ "pushl %edi\n" // push parameter
+ "movl $0xfefefefe, %edi\n"
+ "pushl %ecx\n" // push parameter
+ "pushl %edx\n" // push context pointer
+ "call *%edi\n"
+ "addl $16, %esp\n"
+
+#endif
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic3parm_end(void) {}
+
+
+void _asm_generic3parm_retd(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+#ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "movl %rdi, %rdx\n" // third parameter = parm
+ "movl $0xfefefefe, %rdi\n" // first parameter= context
+ "movl %ecx, %rsi\n" // second parameter = parm
+ "movl %rax, %rcx\n" // fourth parameter = parm
+ "movl $0xfefefefe, %rax\n" // call function
+ "call *%rax\n"
+ "movl %r15, %rsi\n"
+ "movq xmm0, (%r15)\n"
+ "fldl (%r15)\n"
+#else
+ "movl %ecx, %edx\n" // second parameter = parm
+ "movl $0xfefefefe, %ecx\n" // first parameter= context
+ "movl %rdi, %r8\n" // third parameter = parm
+ "movl %rax, %r9\n" // fourth parameter = parm
+ "movl $0xfefefefe, %edi\n" // call function
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "movq xmm0, (%rsi)\n"
+ "fldl (%rsi)\n"
+#endif
+#else
+
+ "subl $16, %esp\n"
+ "movl $0xfefefefe, %edx\n"
+ "movl %edi, 8(%esp)\n"
+ "movl $0xfefefefe, %edi\n"
+ "movl %eax, 12(%esp)\n"
+ "movl %ecx, 4(%esp)\n"
+ "movl %edx, (%esp)\n"
+ "call *%edi\n"
+ "addl $16, %esp\n"
+
+#endif
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic3parm_retd_end(void) {}
+
+
+void _asm_generic2parm(void) // this prob neds to be fixed for ppc
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+
+#ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "movl %edi, %esi\n" // second parameter = parm
+ "movl $0xfefefefe, %edi\n" // first parameter= context
+ "movl %rax, %rdx\n" // third parameter = parm
+ "movl $0xfefefefe, %rcx\n" // call function
+ "call *%rcx\n"
+ "movl %r15, %rsi\n"
+#else
+ "movl $0xfefefefe, %ecx\n" // first parameter= context
+ "movl %edi, %edx\n" // second parameter = parm
+ "movl %rax, %r8\n" // third parameter = parm
+ "movl $0xfefefefe, %edi\n" // call function
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+#endif
+#else
+
+ "movl $0xfefefefe, %edx\n"
+ "movl $0xfefefefe, %ecx\n"
+ "subl $4, %esp\n" // keep stack aligned
+ "pushl %eax\n" // push parameter
+ "pushl %edi\n" // push parameter
+ "pushl %edx\n" // push context pointer
+ "call *%ecx\n"
+ "addl $16, %esp\n"
+
+#endif
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic2parm_end(void) {}
+
+
+void _asm_generic2parm_retd(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+#ifdef AMD64ABI
+ "movl %rsi, %r15\n"
+ "movl %rdi, %rsi\n" // second parameter = parm
+ "movl $0xfefefefe, %rdi\n" // first parameter= context
+ "movl $0xfefefefe, %rcx\n" // call function
+ "movl %rax, %rdx\n" // third parameter = parm
+ "call *%rcx\n"
+ "movl %r15, %rsi\n"
+ "movq xmm0, (%r15)\n"
+ "fldl (%r15)\n"
+#else
+ "movl %rdi, %rdx\n" // second parameter = parm
+ "movl $0xfefefefe, %rcx\n" // first parameter= context
+ "movl $0xfefefefe, %rdi\n" // call function
+ "movl %rax, %r8\n" // third parameter = parm
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "movq xmm0, (%rsi)\n"
+ "fldl (%rsi)\n"
+#endif
+#else
+
+ "subl $16, %esp\n"
+ "movl $0xfefefefe, %edx\n"
+ "movl $0xfefefefe, %ecx\n"
+ "movl %edx, (%esp)\n"
+ "movl %edi, 4(%esp)\n"
+ "movl %eax, 8(%esp)\n"
+ "call *%ecx\n"
+ "addl $16, %esp\n"
+
+#endif
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic2parm_retd_end(void) {}
+
+
+
+
+
+void _asm_generic1parm(void)
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+#ifdef AMD64ABI
+ "movl $0xfefefefe, %rdi\n" // first parameter= context
+ "movl %rsi, %r15\n"
+ "movl %eax, %rsi\n" // second parameter = parm
+ "movl $0xfefefefe, %rcx\n" // call function
+ "call *%rcx\n"
+ "movl %r15, %rsi\n"
+#else
+ "movl $0xfefefefe, %ecx\n" // first parameter= context
+ "movl %eax, %edx\n" // second parameter = parm
+ "movl $0xfefefefe, %edi\n" // call function
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+#endif
+#else
+
+ "movl $0xfefefefe, %edx\n"
+ "subl $8, %esp\n" // keep stack aligned
+ "movl $0xfefefefe, %ecx\n"
+ "pushl %eax\n" // push parameter
+ "pushl %edx\n" // push context pointer
+ "call *%ecx\n"
+ "addl $16, %esp\n"
+
+#endif
+
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic1parm_end(void) {}
+
+
+void _asm_generic1parm_retd(void) // 1 parameter returning double
+{
+ __asm__(
+ FUNCTION_MARKER
+#ifdef TARGET_X64
+#ifdef AMD64ABI
+ "movl $0xfefefefe, %rdi\n" // first parameter = context pointer
+ "movl $0xfefefefe, %rcx\n" // function address
+ "movl %rsi, %r15\n" // save rsi
+ "movl %rax, %rsi\n" // second parameter = parameter
+
+ "call *%rcx\n"
+
+ "movl %r15, %rsi\n"
+ "movq xmm0, (%r15)\n"
+ "fldl (%r15)\n"
+#else
+ "movl $0xfefefefe, %ecx\n" // first parameter= context
+ "movl $0xfefefefe, %edi\n" // call function
+
+ "movl %rax, %rdx\n" // second parameter = parm
+
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%edi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "movq xmm0, (%rsi)\n"
+ "fldl (%rsi)\n"
+#endif
+#else
+
+ "movl $0xfefefefe, %edx\n" // context pointer
+ "movl $0xfefefefe, %ecx\n" // func-addr
+ "subl $16, %esp\n"
+ "movl %eax, 4(%esp)\n" // push parameter
+ "movl %edx, (%esp)\n" // push context pointer
+ "call *%ecx\n"
+ "addl $16, %esp\n"
+
+#endif
+ FUNCTION_MARKER
+ );
+}
+void _asm_generic1parm_retd_end(void) {}
+
+
+
+
+
+// this gets its own stub because it's pretty crucial for performance :/
+
+void _asm_megabuf(void)
+{
+ __asm__(
+
+ FUNCTION_MARKER
+
+#ifdef TARGET_X64
+
+
+#ifdef AMD64ABI
+
+ "fadd" EEL_F_SUFFIX " -8(%r12)\n"
+
+ "fistpl (%rsi)\n"
+
+ // check if (%rsi) is in range, and buffer available, otherwise call function
+ "movl (%rsi), %edx\n"
+ "cmpl %1, %rdx\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))
+ "jae 0f\n"
+ "movll %rdx, %rax\n"
+ "shrll %2, %rax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3/*log2(sizeof(void *))*/ )
+ "andll %3, %rax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*8 /*sizeof(void*)*/ )
+ "movll (%r12, %rax), %rax\n"
+ "testl %rax, %rax\n"
+ "jnz 1f\n"
+ "0:\n"
+ "movl $0xfefefefe, %rax\n"
+ "movl %r12, %rdi\n" // set first parm to ctx
+ "movl %rsi, %r15\n" // save rsi
+ "movl %rdx, %esi\n" // esi becomes second parameter (edi is first, context pointer)
+ "call *%rax\n"
+ "movl %r15, %rsi\n" // restore rsi
+ "jmp 2f\n"
+ "1:\n"
+ "andll %4, %rdx\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1)
+ "shlll $3, %rdx\n" // 3 is log2(sizeof(EEL_F))
+ "addll %rdx, %rax\n"
+ "2:\n"
+
+#else
+
+ "fadd" EEL_F_SUFFIX " -8(%r12)\n"
+
+ "fistpl (%rsi)\n"
+
+ // check if (%rsi) is in range...
+ "movl (%rsi), %edi\n"
+ "cmpl %1, %edi\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))
+ "jae 0f\n"
+ "movll %rdi, %rax\n"
+ "shrll %2, %rax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3/*log2(sizeof(void *))*/ )
+ "andll %3, %rax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*8 /*sizeof(void*)*/ )
+ "movll (%r12, %rax), %rax\n"
+ "testl %rax, %rax\n"
+ "jnz 1f\n"
+ "0:\n"
+ "movl $0xfefefefe, %rax\n" // function ptr
+ "movl %r12, %rcx\n" // set first parm to ctx
+ "movl %rdi, %rdx\n" // rdx is second parameter (rcx is first)
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%rax\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "jmp 2f\n"
+ "1:\n"
+ "andll %4, %rdi\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1)
+ "shlll $3, %rdi\n" // 3 is log2(sizeof(EEL_F))
+ "addll %rdi, %rax\n"
+ "2:\n"
+#endif
+
+
+ FUNCTION_MARKER
+#else
+ "fadd" EEL_F_SUFFIX " -8(%%ebx)\n"
+ "fistpl (%%esi)\n"
+
+ // check if (%esi) is in range, and buffer available, otherwise call function
+ "movl (%%esi), %%edi\n"
+ "cmpl %0, %%edi\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))
+ "jae 0f\n"
+
+ "movl %%edi, %%eax\n"
+ "shrl %1, %%eax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 2/*log2(sizeof(void *))*/ )
+ "andl %2, %%eax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*4 /*sizeof(void*)*/ )
+ "movl (%%ebx, %%eax), %%eax\n"
+ "testl %%eax, %%eax\n"
+ "jnz 1f\n"
+ "0:\n"
+ "subl $8, %%esp\n" // keep stack aligned
+ "movl $0xfefefefe, %%ecx\n"
+ "pushl %%edi\n" // parameter
+ "pushl %%ebx\n" // push context pointer
+ "call *%%ecx\n"
+ "addl $16, %%esp\n"
+ "jmp 2f\n"
+ "1:\n"
+ "andl %3, %%edi\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1)
+ "shll $3, %%edi\n" // 3 is log2(sizeof(EEL_F))
+ "addl %%edi, %%eax\n"
+ "2:"
+ FUNCTION_MARKER
+
+ #ifndef _MSC_VER
+ :: "i" (((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))),
+ "i" ((NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 2/*log2(sizeof(void *))*/ )),
+ "i" (((NSEEL_RAM_BLOCKS-1)*4 /*sizeof(void*)*/ )),
+ "i" ((NSEEL_RAM_ITEMSPERBLOCK-1 ))
+ #endif
+
+
+
+#endif
+
+ );
+}
+
+void _asm_megabuf_end(void) {}
+
+
+void _asm_gmegabuf(void)
+{
+ __asm__(
+
+ FUNCTION_MARKER
+
+#ifdef TARGET_X64
+
+
+#ifdef AMD64ABI
+
+ "movl %rsi, %r15\n"
+ "fadd" EEL_F_SUFFIX " -8(%r12)\n"
+ "movl $0xfefefefe, %rdi\n" // first parameter = context pointer
+ "fistpl (%rsi)\n"
+ "movl $0xfefefefe, %edx\n"
+ "movl (%rsi), %esi\n"
+ "call *%rdx\n"
+ "movl %r15, %rsi\n"
+
+#else
+ "fadd" EEL_F_SUFFIX " -8(%r12)\n"
+ "movl $0xfefefefe, %rcx\n" // first parameter = context pointer
+ "fistpl (%rsi)\n"
+ "movl $0xfefefefe, %rdi\n"
+ "movl (%rsi), %edx\n"
+ "subl X64_EXTRA_STACK_SPACE, %rsp\n"
+ "call *%rdi\n"
+ "addl X64_EXTRA_STACK_SPACE, %rsp\n"
+#endif
+
+
+#else
+ "subl $16, %esp\n" // keep stack aligned
+ "movl $0xfefefefe, (%esp)\n"
+ "fadd" EEL_F_SUFFIX " -8(%ebx)\n"
+ "movl $0xfefefefe, %edi\n"
+ "fistpl 4(%esp)\n"
+ "call *%edi\n"
+ "addl $16, %esp\n"
+
+#endif
+
+
+
+ FUNCTION_MARKER
+ );
+}
+
+void _asm_gmegabuf_end(void) {}
+
+void nseel_asm_stack_push(void)
+{
+#ifdef TARGET_X64
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %rdi\n"
+ "movll (%rax), %rcx\n"
+ "movll (%rdi), %rax\n"
+ "addll $8, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "andll %rdx, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "orll %rdx, %rax\n"
+ "movll %rcx, (%rax)\n"
+ "movll %rax, (%rdi)\n"
+ FUNCTION_MARKER
+ );
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+
+ "movl (%eax), %ecx\n"
+ "movl 4(%eax), %edx\n"
+
+ "movl (%edi), %eax\n"
+
+ "addl $8, %eax\n"
+ "andl $0xfefefefe, %eax\n"
+ "orl $0xfefefefe, %eax\n"
+
+ "movl %ecx, (%eax)\n"
+ "movl %edx, 4(%eax)\n"
+
+ "movl %eax, (%edi)\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+
+}
+void nseel_asm_stack_push_end(void) {}
+
+
+
+void nseel_asm_stack_pop(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %rdi\n"
+ "movll (%rdi), %rcx\n"
+ "movq (%rcx), %xmm0\n"
+ "subll $8, %rcx\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "andll %rdx, %rcx\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "orll %rdx, %rcx\n"
+ "movll %rcx, (%rdi)\n"
+ "movq %xmm0, (%eax)\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "movl (%edi), %ecx\n"
+ "fld" EEL_F_SUFFIX " (%ecx)\n"
+ "subl $8, %ecx\n"
+ "andl $0xfefefefe, %ecx\n"
+ "orl $0xfefefefe, %ecx\n"
+ "movl %ecx, (%edi)\n"
+ "fstp" EEL_F_SUFFIX " (%eax)\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+}
+void nseel_asm_stack_pop_end(void) {}
+
+
+void nseel_asm_stack_pop_fast(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %rdi\n"
+ "movll (%rdi), %rcx\n"
+ "movll %rcx, %rax\n"
+ "subll $8, %rcx\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "andll %rdx, %rcx\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "orll %rdx, %rcx\n"
+ "movll %rcx, (%rdi)\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "movl (%edi), %ecx\n"
+ "movl %ecx, %eax\n"
+ "subl $8, %ecx\n"
+ "andl $0xfefefefe, %ecx\n"
+ "orl $0xfefefefe, %ecx\n"
+ "movl %ecx, (%edi)\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+}
+void nseel_asm_stack_pop_fast_end(void) {}
+
+void nseel_asm_stack_peek_int(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll $0xfefefefe, %rdi\n"
+ "movll (%rdi), %rax\n"
+ "movl $0xfefefefe, %rdx\n"
+ "subll %rdx, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "andll %rdx, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "orll %rdx, %rax\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "movl (%edi), %eax\n"
+ "movl $0xfefefefe, %edx\n"
+ "subl %edx, %eax\n"
+ "andl $0xfefefefe, %eax\n"
+ "orl $0xfefefefe, %eax\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+
+}
+void nseel_asm_stack_peek_int_end(void) {}
+
+
+
+void nseel_asm_stack_peek(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll $0xfefefefe, %rdi\n"
+ "fistpl (%rsi)\n"
+ "movll (%rdi), %rax\n"
+ "movll (%rsi), %rdx\n"
+ "shll $3, %rdx\n" // log2(sizeof(EEL_F))
+ "subl %rdx, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "andll %rdx, %rax\n"
+ "movl $0xFEFEFEFE, %rdx\n"
+ "orll %rdx, %rax\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "fistpl (%esi)\n"
+ "movl (%edi), %eax\n"
+ "movl (%esi), %edx\n"
+ "shll $3, %edx\n" // log2(sizeof(EEL_F))
+ "subl %edx, %eax\n"
+ "andl $0xfefefefe, %eax\n"
+ "orl $0xfefefefe, %eax\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+
+}
+void nseel_asm_stack_peek_end(void) {}
+
+
+void nseel_asm_stack_peek_top(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll $0xfefefefe, %rdi\n"
+ "movll (%rdi), %rax\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "movl (%edi), %eax\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+
+}
+void nseel_asm_stack_peek_top_end(void) {}
+
+void nseel_asm_stack_exch(void)
+{
+#ifdef TARGET_X64
+
+ __asm__(
+ FUNCTION_MARKER
+ "movll $0xfefefefe, %rdi\n"
+ "movll (%rdi), %rcx\n"
+ "movq (%rcx), %xmm0\n"
+ "movq (%rax), %xmm1\n"
+ "movq %xmm0, (%rax)\n"
+ "movq %xmm1, (%rcx)\n"
+ FUNCTION_MARKER
+ );
+
+#else
+
+ __asm__(
+ FUNCTION_MARKER
+ "movl $0xfefefefe, %edi\n"
+ "movl (%edi), %ecx\n"
+ "fld" EEL_F_SUFFIX " (%ecx)\n"
+ "fld" EEL_F_SUFFIX " (%eax)\n"
+ "fstp" EEL_F_SUFFIX " (%ecx)\n"
+ "fstp" EEL_F_SUFFIX " (%eax)\n"
+ FUNCTION_MARKER
+ );
+
+#endif
+
+}
+void nseel_asm_stack_exch_end(void) {}
+
+#ifdef TARGET_X64
+void eel_callcode64()
+{
+ __asm__(
+#ifndef EEL_X64_NO_CHANGE_FPFLAGS
+ "subl $16, %rsp\n"
+ "fnstcw (%rsp)\n"
+ "mov (%rsp), %ax\n"
+ "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions
+ "mov %ax, 4(%rsp)\n"
+ "fldcw 4(%rsp)\n"
+#endif
+ "push %rbx\n"
+ "push %rbp\n"
+ "push %r12\n"
+ "push %r13\n"
+ "push %r14\n"
+ "push %r15\n"
+
+#ifdef AMD64ABI
+ "movll %rsi, %r12\n" // second parameter is ram-blocks pointer
+ "call %rdi\n"
+#else
+ "push %rdi\n"
+ "push %rsi\n"
+ "movll %rdx, %r12\n" // second parameter is ram-blocks pointer
+ "call %rcx\n"
+ "pop %rsi\n"
+ "pop %rdi\n"
+#endif
+
+ "fclex\n"
+
+ "pop %r15\n"
+ "pop %r14\n"
+ "pop %r13\n"
+ "pop %r12\n"
+ "pop %rbp\n"
+ "pop %rbx\n"
+
+#ifndef EEL_X64_NO_CHANGE_FPFLAGS
+ "fldcw (%rsp)\n"
+ "addl $16, %rsp\n"
+#endif
+
+ "ret\n"
+ );
+}
+
+void eel_callcode64_fast()
+{
+ __asm__(
+ "push %rbx\n"
+ "push %rbp\n"
+ "push %r12\n"
+ "push %r13\n"
+ "push %r14\n"
+ "push %r15\n"
+
+#ifdef AMD64ABI
+ "movll %rsi, %r12\n" // second parameter is ram-blocks pointer
+ "call %rdi\n"
+#else
+ "push %rdi\n"
+ "push %rsi\n"
+ "movll %rdx, %r12\n" // second parameter is ram-blocks pointer
+ "call %rcx\n"
+ "pop %rsi\n"
+ "pop %rdi\n"
+#endif
+
+ "pop %r15\n"
+ "pop %r14\n"
+ "pop %r13\n"
+ "pop %r12\n"
+ "pop %rbp\n"
+ "pop %rbx\n"
+
+ "ret\n"
+ );
+}
+
+void eel_setfp_round()
+{
+ __asm__(
+#ifndef EEL_X64_NO_CHANGE_FPFLAGS
+ "subl $16, %rsp\n"
+ "fnstcw (%rsp)\n"
+ "mov (%rsp), %ax\n"
+ "and $0xF3FF, %ax\n" // set round to nearest
+ "mov %ax, 4(%rsp)\n"
+ "fldcw 4(%rsp)\n"
+ "addl $16, %rsp\n"
+#endif
+ "ret\n"
+ );
+}
+
+void eel_setfp_trunc()
+{
+ __asm__(
+#ifndef EEL_X64_NO_CHANGE_FPFLAGS
+ "subl $16, %rsp\n"
+ "fnstcw (%rsp)\n"
+ "mov (%rsp), %ax\n"
+ "or $0xC00, %ax\n" // set to truncate
+ "mov %ax, 4(%rsp)\n"
+ "fldcw 4(%rsp)\n"
+ "addl $16, %rsp\n"
+#endif
+ "ret\n"
+ );
+}
+
+void eel_enterfp(int s[2])
+{
+ __asm__(
+#ifdef AMD64ABI
+ "fnstcw (%rdi)\n"
+ "mov (%rdi), %ax\n"
+ "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions
+ "mov %ax, 4(%rdi)\n"
+ "fldcw 4(%rdi)\n"
+#else
+ "fnstcw (%rcx)\n"
+ "mov (%rcx), %ax\n"
+ "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions
+ "mov %ax, 4(%rcx)\n"
+ "fldcw 4(%rcx)\n"
+#endif
+ "ret\n"
+ );
+}
+void eel_leavefp(int s[2])
+{
+ __asm__(
+#ifdef AMD64ABI
+ "fldcw (%rdi)\n"
+#else
+ "fldcw (%rcx)\n"
+#endif
+ "ret\n";
+ );
+}
+
+#endif