diff options
Diffstat (limited to 'Src/ns-eel2/asm-nseel-x86-gcc.c')
-rw-r--r-- | Src/ns-eel2/asm-nseel-x86-gcc.c | 2153 |
1 files changed, 2153 insertions, 0 deletions
diff --git a/Src/ns-eel2/asm-nseel-x86-gcc.c b/Src/ns-eel2/asm-nseel-x86-gcc.c new file mode 100644 index 00000000..5c3d747e --- /dev/null +++ b/Src/ns-eel2/asm-nseel-x86-gcc.c @@ -0,0 +1,2153 @@ +/* note: only EEL_F_SIZE=8 is now supported (no float EEL_F's) */ + +#ifndef AMD64ABI +#define X64_EXTRA_STACK_SPACE 32 // win32 requires allocating space for 4 parameters at 8 bytes each, even though we pass via register +#endif + +void nseel_asm_1pdd(void) +{ + __asm__( + + FUNCTION_MARKER + + "movl $0xfefefefe, %edi\n" +#ifdef TARGET_X64 + "fstpl (%rsi)\n" + "movq (%rsi), %xmm0\n" + #ifdef AMD64ABI + "movl %rsi, %r15\n" + "call *%edi\n" + "movl %r15, %rsi\n" + #else + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + #endif + "movq xmm0, (%rsi)\n" + "fldl (%rsi)\n" +#else + "subl $16, %esp\n" + "fstpl (%esp)\n" + "call *%edi\n" + "addl $16, %esp\n" +#endif + + FUNCTION_MARKER + + ); +} +void nseel_asm_1pdd_end(void){} + +void nseel_asm_2pdd(void) +{ + __asm__( + FUNCTION_MARKER + + "movl $0xfefefefe, %edi\n" +#ifdef TARGET_X64 + "fstpl 8(%rsi)\n" + "fstpl (%rsi)\n" + "movq 8(%rsi), %xmm1\n" + "movq (%rsi), %xmm0\n" + #ifdef AMD64ABI + "movl %rsi, %r15\n" + "call *%edi\n" + "movl %r15, %rsi\n" + #else + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + #endif + "movq xmm0, (%rsi)\n" + "fldl (%rsi)\n" +#else + "subl $16, %esp\n" + "fstpl 8(%esp)\n" + "fstpl (%esp)\n" + "call *%edi\n" + "addl $16, %esp\n" +#endif + + FUNCTION_MARKER + ); +} +void nseel_asm_2pdd_end(void){} + +void nseel_asm_2pdds(void) +{ + __asm__( + FUNCTION_MARKER + + "movl $0xfefefefe, %eax\n" +#ifdef TARGET_X64 + "fstpl (%rsi)\n" + "movq (%rdi), %xmm0\n" + "movq (%rsi), %xmm1\n" + #ifdef AMD64ABI + "movl %rsi, %r15\n" + "movl %rdi, %r14\n" + "call *%eax\n" + "movl %r14, %rdi\n" /* restore thrashed rdi */ + "movl %r15, %rsi\n" + "movl %r14, %rax\n" /* set return value */ + "movq xmm0, (%r14)\n" + #else + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%eax\n" + "movq xmm0, (%edi)\n" + "movl %edi, %eax\n" /* set return value */ + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + #endif +#else + "subl $8, %esp\n" + "fstpl (%esp)\n" + "pushl 4(%edi)\n" /* push parameter */ + "pushl (%edi)\n" /* push the rest of the parameter */ + "call *%eax\n" + "addl $16, %esp\n" + "fstpl (%edi)\n" /* store result */ + "movl %edi, %eax\n" /* set return value */ +#endif + + // denormal-fix result (this is only currently used for pow_op, so we want this!) + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + + FUNCTION_MARKER + + ); +} +void nseel_asm_2pdds_end(void){} + + + +//--------------------------------------------------------------------------------------------------------------- + + +// do nothing, eh +void nseel_asm_exec2(void) +{ + __asm__( + FUNCTION_MARKER + "" + FUNCTION_MARKER + ); +} +void nseel_asm_exec2_end(void) { } + + + +void nseel_asm_invsqrt(void) +{ + __asm__( + FUNCTION_MARKER + "movl $0x5f3759df, %edx\n" + "fsts (%esi)\n" +#ifdef TARGET_X64 + "movl 0xfefefefe, %rax\n" + "fmul" EEL_F_SUFFIX " (%rax)\n" + "movsxl (%esi), %rcx\n" +#else + "fmul" EEL_F_SUFFIX " (0xfefefefe)\n" + "movl (%esi), %ecx\n" +#endif + "sarl $1, %ecx\n" + "subl %ecx, %edx\n" + "movl %edx, (%esi)\n" + "fmuls (%esi)\n" + "fmuls (%esi)\n" +#ifdef TARGET_X64 + "movl 0xfefefefe, %rax\n" + "fadd" EEL_F_SUFFIX " (%rax)\n" +#else + "fadd" EEL_F_SUFFIX " (0xfefefefe)\n" +#endif + "fmuls (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_invsqrt_end(void) {} + + +void nseel_asm_dbg_getstackptr(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "ffree %st(0)\n" +#else + "fstpl %st(0)\n" +#endif + "movl %esp, (%esi)\n" + "fildl (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_dbg_getstackptr_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_sin(void) +{ + __asm__( + FUNCTION_MARKER + "fsin\n" + FUNCTION_MARKER + ); +} +void nseel_asm_sin_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_cos(void) +{ + __asm__( + FUNCTION_MARKER + "fcos\n" + FUNCTION_MARKER + ); +} +void nseel_asm_cos_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_tan(void) +{ + __asm__( + FUNCTION_MARKER + "fptan\n" + "fstp %st(0)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_tan_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_sqr(void) +{ + __asm__( + FUNCTION_MARKER + "fmul %st(0), %st(0)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_sqr_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_sqrt(void) +{ + __asm__( + FUNCTION_MARKER + "fabs\n" + "fsqrt\n" + FUNCTION_MARKER + ); +} +void nseel_asm_sqrt_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_log(void) +{ + __asm__( + FUNCTION_MARKER + "fldln2\n" + "fxch\n" + "fyl2x\n" + FUNCTION_MARKER + ); +} +void nseel_asm_log_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_log10(void) +{ + __asm__( + FUNCTION_MARKER + "fldlg2\n" + "fxch\n" + "fyl2x\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_log10_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_abs(void) +{ + __asm__( + FUNCTION_MARKER + "fabs\n" + FUNCTION_MARKER + ); +} +void nseel_asm_abs_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_assign(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll (%rax), %rdx\n" + "movll %rdx, %rcx\n" + "shrl $32, %rdx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "movll %rdi, %rax\n" + "jg 0f\n" + "subl %ecx, %ecx\n" + "0:\n" + "movll %rcx, (%edi)\n" + + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl (%eax), %ecx\n" + "movl 4(%eax), %edx\n" + "movl %edx, %eax\n" + "addl $0x00100000, %eax\n" // if exponent is zero, make exponent 0x7ff, if 7ff, make 7fe + "andl $0x7ff00000, %eax\n" + "cmpl $0x00200000, %eax\n" + "jg 0f\n" + "subl %ecx, %ecx\n" + "subl %edx, %edx\n" + "0:\n" + "movl %edi, %eax\n" + "movl %ecx, (%edi)\n" + "movl %edx, 4(%edi)\n" + + FUNCTION_MARKER + ); + +#endif +} +void nseel_asm_assign_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_assign_fromfp(void) +{ + __asm__( + FUNCTION_MARKER + "fstpl (%edi)\n" + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "movl %edi, %eax\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_assign_fromfp_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_assign_fast_fromfp(void) +{ + __asm__( + FUNCTION_MARKER + "movl %edi, %eax\n" + "fstpl (%edi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_assign_fast_fromfp_end(void) {} + + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_assign_fast(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll (%rax), %rdx\n" + "movll %rdx, (%edi)\n" + "movll %rdi, %rax\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl (%eax), %ecx\n" + "movl %ecx, (%edi)\n" + "movl 4(%eax), %ecx\n" + + "movl %edi, %eax\n" + "movl %ecx, 4(%edi)\n" + FUNCTION_MARKER + ); + +#endif +} +void nseel_asm_assign_fast_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_add(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "faddp %st(1)\n" +#else + "fadd\n" +#endif + FUNCTION_MARKER + ); +} +void nseel_asm_add_end(void) {} + +void nseel_asm_add_op(void) +{ + __asm__( + FUNCTION_MARKER + "fadd" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_add_op_end(void) {} + +void nseel_asm_add_op_fast(void) +{ + __asm__( + FUNCTION_MARKER + "fadd" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_add_op_fast_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_sub(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "fsubrp %st(0), %st(1)\n" +#else + #ifdef __GNUC__ + #ifdef __INTEL_COMPILER + "fsub\n" + #else + "fsubr\n" // gnuc has fsub/fsubr backwards, ack + #endif + #else + "fsub\n" + #endif +#endif + FUNCTION_MARKER + ); +} +void nseel_asm_sub_end(void) {} + +void nseel_asm_sub_op(void) +{ + __asm__( + FUNCTION_MARKER + "fsubr" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_sub_op_end(void) {} + +void nseel_asm_sub_op_fast(void) +{ + __asm__( + FUNCTION_MARKER + "fsubr" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_sub_op_fast_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_mul(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "fmulp %st(0), %st(1)\n" +#else + "fmul\n" +#endif + FUNCTION_MARKER + ); +} +void nseel_asm_mul_end(void) {} + +void nseel_asm_mul_op(void) +{ + __asm__( + FUNCTION_MARKER + "fmul" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_mul_op_end(void) {} + +void nseel_asm_mul_op_fast(void) +{ + __asm__( + FUNCTION_MARKER + "fmul" EEL_F_SUFFIX " (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_mul_op_fast_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_div(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "fdivrp %st(1)\n" +#else + #ifdef __GNUC__ + #ifdef __INTEL_COMPILER + "fdiv\n" + #else + "fdivr\n" // gcc inline asm seems to have fdiv/fdivr backwards + #endif + #else + "fdiv\n" + #endif +#endif + FUNCTION_MARKER + ); +} +void nseel_asm_div_end(void) {} + +void nseel_asm_div_op(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" +#ifdef __clang__ + "fdivp %st(1)\n" +#else + #ifndef __GNUC__ + "fdivr\n" + #else + #ifdef __INTEL_COMPILER + "fdivp %st(1)\n" + #else + "fdiv\n" + #endif + #endif +#endif + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + "movl 4(%edi), %edx\n" + "addl $0x00100000, %edx\n" + "andl $0x7FF00000, %edx\n" + "cmpl $0x00200000, %edx\n" + "jg 0f\n" + "subl %edx, %edx\n" +#ifdef TARGET_X64 + "movll %rdx, (%rdi)\n" +#else + "movl %edx, (%edi)\n" + "movl %edx, 4(%edi)\n" +#endif + "0:\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_div_op_end(void) {} + +void nseel_asm_div_op_fast(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" +#ifdef __clang__ + "fdivp %st(1)\n" +#else + #ifndef __GNUC__ + "fdivr\n" + #else + #ifdef __INTEL_COMPILER + "fdivp %st(1)\n" + #else + "fdiv\n" + #endif + #endif +#endif + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_div_op_fast_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_mod(void) +{ + __asm__( + FUNCTION_MARKER + "fabs\n" + "fistpl (%esi)\n" + "fabs\n" + "fistpl 4(%esi)\n" + "xorl %edx, %edx\n" + "cmpl $0, (%esi)\n" + "je 0f\n" // skip devide, set return to 0 + "movl 4(%esi), %eax\n" + "divl (%esi)\n" + "0:\n" + "movl %edx, (%esi)\n" + "fildl (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_mod_end(void) {} + +void nseel_asm_shl(void) +{ + __asm__( + FUNCTION_MARKER + "fistpl (%esi)\n" + "fistpl 4(%esi)\n" + "movl (%esi), %ecx\n" + "movl 4(%esi), %eax\n" + "shll %cl, %eax\n" + "movl %eax, (%esi)\n" + "fildl (%esi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_shl_end(void) {} + +void nseel_asm_shr(void) +{ + __asm__( + FUNCTION_MARKER + "fistpl (%esi)\n" + "fistpl 4(%esi)\n" + "movl (%esi), %ecx\n" + "movl 4(%esi), %eax\n" + "sarl %cl, %eax\n" + "movl %eax, (%esi)\n" + "fildl (%esi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_shr_end(void) {} + + +void nseel_asm_mod_op(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fxch\n" + "fabs\n" + "fistpl (%edi)\n" + "fabs\n" + "fistpl (%esi)\n" + "xorl %edx, %edx\n" + "cmpl $0, (%edi)\n" + "je 0f\n" // skip devide, set return to 0 + "movl (%esi), %eax\n" + "divl (%edi)\n" + "0:\n" + "movl %edx, (%edi)\n" + "fildl (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_mod_op_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_or(void) +{ + __asm__( + FUNCTION_MARKER + "fistpll (%esi)\n" + "fistpll 8(%esi)\n" +#ifdef TARGET_X64 + "movll 8(%rsi), %rdi\n" + "orll %rdi, (%rsi)\n" +#else + "movl 8(%esi), %edi\n" + "movl 12(%esi), %ecx\n" + "orl %edi, (%esi)\n" + "orl %ecx, 4(%esi)\n" +#endif + "fildll (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_or_end(void) {} + +void nseel_asm_or0(void) +{ + __asm__( + FUNCTION_MARKER + "fistpll (%esi)\n" + "fildll (%esi)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_or0_end(void) {} + +void nseel_asm_or_op(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fxch\n" + "fistpll (%edi)\n" + "fistpll (%esi)\n" +#ifdef TARGET_X64 + "movll (%rsi), %rax\n" + "orll %rax, (%rdi)\n" +#else + "movl (%esi), %eax\n" + "movl 4(%esi), %ecx\n" + "orl %eax, (%edi)\n" + "orl %ecx, 4(%edi)\n" +#endif + "fildll (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_or_op_end(void) {} + + +void nseel_asm_xor(void) +{ + __asm__( + FUNCTION_MARKER + "fistpll (%esi)\n" + "fistpll 8(%esi)\n" +#ifdef TARGET_X64 + "movll 8(%rsi), %rdi\n" + "xorll %rdi, (%rsi)\n" +#else + "movl 8(%esi), %edi\n" + "movl 12(%esi), %ecx\n" + "xorl %edi, (%esi)\n" + "xorl %ecx, 4(%esi)\n" +#endif + "fildll (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_xor_end(void) {} + +void nseel_asm_xor_op(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fxch\n" + "fistpll (%edi)\n" + "fistpll (%esi)\n" +#ifdef TARGET_X64 + "movll (%rsi), %rax\n" + "xorll %rax, (%rdi)\n" +#else + "movl (%esi), %eax\n" + "movl 4(%esi), %ecx\n" + "xorl %eax, (%edi)\n" + "xorl %ecx, 4(%edi)\n" +#endif + "fildll (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_xor_op_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_and(void) +{ + __asm__( + FUNCTION_MARKER + "fistpll (%esi)\n" + "fistpll 8(%esi)\n" +#ifdef TARGET_X64 + "movll 8(%rsi), %rdi\n" + "andll %rdi, (%rsi)\n" +#else + "movl 8(%esi), %edi\n" + "movl 12(%esi), %ecx\n" + "andl %edi, (%esi)\n" + "andl %ecx, 4(%esi)\n" +#endif + "fildll (%esi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_and_end(void) {} + +void nseel_asm_and_op(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fxch\n" + "fistpll (%edi)\n" + "fistpll (%esi)\n" +#ifdef TARGET_X64 + "movll (%rsi), %rax\n" + "andll %rax, (%rdi)\n" +#else + "movl (%esi), %eax\n" + "movl 4(%esi), %ecx\n" + "andl %eax, (%edi)\n" + "andl %ecx, 4(%edi)\n" +#endif + "fildll (%edi)\n" + "movl %edi, %eax\n" + "fstp" EEL_F_SUFFIX " (%edi)\n" + + FUNCTION_MARKER + ); +} +void nseel_asm_and_op_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_uplus(void) // this is the same as doing nothing, it seems +{ + __asm__( + FUNCTION_MARKER + "" + FUNCTION_MARKER + ); +} +void nseel_asm_uplus_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_uminus(void) +{ + __asm__( + FUNCTION_MARKER + "fchs\n" + FUNCTION_MARKER + ); +} +void nseel_asm_uminus_end(void) {} + + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_sign(void) +{ + __asm__( + FUNCTION_MARKER + +#ifdef TARGET_X64 + + + "fst" EEL_F_SUFFIX " (%rsi)\n" + "mov" EEL_F_SUFFIX " (%rsi), %rdx\n" + "movll $0x7FFFFFFFFFFFFFFF, %rcx\n" + "testll %rcx, %rdx\n" + "jz 0f\n" // zero zero, return the value passed directly + // calculate sign + "incll %rcx\n" // rcx becomes 0x80000... + "fstp %st(0)\n" + "fld1\n" + "testl %rcx, %rdx\n" + "jz 0f\n" + "fchs\n" + "0:\n" + +#else + + "fsts (%esi)\n" + "movl (%esi), %ecx\n" + "movl $0x7FFFFFFF, %edx\n" + "testl %edx, %ecx\n" + "jz 0f\n" // zero zero, return the value passed directly + // calculate sign + "incl %edx\n" // edx becomes 0x8000... + "fstp %st(0)\n" + "fld1\n" + "testl %edx, %ecx\n" + "jz 0f\n" + "fchs\n" + "0:\n" + +#endif + FUNCTION_MARKER +); +} +void nseel_asm_sign_end(void) {} + + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_bnot(void) +{ + __asm__( + FUNCTION_MARKER + "testl %eax, %eax\n" + "setz %al\n" + "andl $0xff, %eax\n" + FUNCTION_MARKER + ); +} +void nseel_asm_bnot_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_fcall(void) +{ + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edx\n" +#ifdef TARGET_X64 + "subl $8, %esp\n" + "call *%edx\n" + "addl $8, %esp\n" +#else + "subl $12, %esp\n" /* keep stack 16 byte aligned, 4 bytes for return address */ + "call *%edx\n" + "addl $12, %esp\n" +#endif + FUNCTION_MARKER + ); +} +void nseel_asm_fcall_end(void) {} + +void nseel_asm_band(void) +{ + __asm__( + FUNCTION_MARKER + "testl %eax, %eax\n" + "jz 0f\n" + + "movl $0xfefefefe, %ecx\n" +#ifdef TARGET_X64 + "subl $8, %rsp\n" +#else + "subl $12, %esp\n" +#endif + "call *%ecx\n" +#ifdef TARGET_X64 + "addl $8, %rsp\n" +#else + "addl $12, %esp\n" +#endif + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_band_end(void) {} + +void nseel_asm_bor(void) +{ + __asm__( + FUNCTION_MARKER + "testl %eax, %eax\n" + "jnz 0f\n" + + "movl $0xfefefefe, %ecx\n" +#ifdef TARGET_X64 + "subl $8, %rsp\n" +#else + "subl $12, %esp\n" +#endif + "call *%ecx\n" +#ifdef TARGET_X64 + "addl $8, %rsp\n" +#else + "addl $12, %esp\n" +#endif + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_bor_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_equal(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "fsubp %st(1)\n" +#else + "fsub\n" +#endif + + "fabs\n" +#ifdef TARGET_X64 + "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact] +#else + "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact] +#endif + "fstsw %ax\n" + "andl $256, %eax\n" // old behavior: if 256 set, true (NaN means true) + + FUNCTION_MARKER + ); +} +void nseel_asm_equal_end(void) {} +// +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_equal_exact(void) +{ + __asm__( + FUNCTION_MARKER + "fcompp\n" + "fstsw %ax\n" // for equal 256 and 1024 should be clear, 16384 should be set + "andl $17664, %eax\n" // mask C4/C3/C1, bits 8/10/14, 16384|256|1024 -- if equals 16384, then equality + "cmp $16384, %eax\n" + "je 0f\n" + "subl %eax, %eax\n" + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_equal_exact_end(void) {} + +void nseel_asm_notequal_exact(void) +{ + __asm__( + FUNCTION_MARKER + "fcompp\n" + "fstsw %ax\n" // for equal 256 and 1024 should be clear, 16384 should be set + "andl $17664, %eax\n" // mask C4/C3/C1, bits 8/10/14, 16384|256|1024 -- if equals 16384, then equality + "cmp $16384, %eax\n" + "je 0f\n" + "subl %eax, %eax\n" + "0:\n" + "xorl $16384, %eax\n" // flip the result + FUNCTION_MARKER + ); +} +void nseel_asm_notequal_exact_end(void) {} +// +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_notequal(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef __clang__ + "fsubp %st(1)\n" +#else + "fsub\n" +#endif + + "fabs\n" +#ifdef TARGET_X64 + "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact] +#else + "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact] +#endif + "fstsw %ax\n" + "andl $256, %eax\n" + "xorl $256, %eax\n" // old behavior: if 256 set, FALSE (NaN makes for false) + FUNCTION_MARKER + ); +} +void nseel_asm_notequal_end(void) {} + + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_above(void) +{ + __asm__( + FUNCTION_MARKER + "fcompp\n" + "fstsw %ax\n" + "andl $1280, %eax\n" // (1024+256) old behavior: NaN would mean 1, preserve that + FUNCTION_MARKER + ); +} +void nseel_asm_above_end(void) {} + +//--------------------------------------------------------------------------------------------------------------- +void nseel_asm_beloweq(void) +{ + __asm__( + FUNCTION_MARKER + "fcompp\n" + "fstsw %ax\n" + "andl $256, %eax\n" // old behavior: NaN would be 0 (ugh) + "xorl $256, %eax\n" + FUNCTION_MARKER + ); +} +void nseel_asm_beloweq_end(void) {} + + +void nseel_asm_booltofp(void) +{ + __asm__( + FUNCTION_MARKER + "testl %eax, %eax\n" + "jz 0f\n" + "fld1\n" + "jmp 1f\n" + "0:\n" + "fldz\n" + "1:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_booltofp_end(void) {} + +void nseel_asm_fptobool(void) +{ + __asm__( + FUNCTION_MARKER + "fabs\n" +#ifdef TARGET_X64 + "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact] +#else + "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact] +#endif + "fstsw %ax\n" + "andl $256, %eax\n" + "xorl $256, %eax\n" + FUNCTION_MARKER + ); +} +void nseel_asm_fptobool_end(void) {} + +void nseel_asm_fptobool_rev(void) +{ + __asm__( + FUNCTION_MARKER + "fabs\n" +#ifdef TARGET_X64 + "fcomp" EEL_F_SUFFIX " -8(%r12)\n" //[g_closefact] +#else + "fcomp" EEL_F_SUFFIX " -8(%ebx)\n" //[g_closefact] +#endif + "fstsw %ax\n" + "andl $256, %eax\n" + FUNCTION_MARKER + ); +} +void nseel_asm_fptobool_rev_end(void) {} + +void nseel_asm_min(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fcomp" EEL_F_SUFFIX " (%eax)\n" + "movl %eax, %ecx\n" + "fstsw %ax\n" + "testl $256, %eax\n" + "movl %ecx, %eax\n" + "jz 0f\n" + "movl %edi, %eax\n" + "0:\n" + FUNCTION_MARKER + ); + +} +void nseel_asm_min_end(void) {} + +void nseel_asm_max(void) +{ + __asm__( + FUNCTION_MARKER + "fld" EEL_F_SUFFIX " (%edi)\n" + "fcomp" EEL_F_SUFFIX " (%eax)\n" + "movl %eax, %ecx\n" + "fstsw %ax\n" + "testl $256, %eax\n" + "movl %ecx, %eax\n" + "jnz 0f\n" + "movl %edi, %eax\n" + "0:\n" + FUNCTION_MARKER + ); +} +void nseel_asm_max_end(void) {} + + + +void nseel_asm_min_fp(void) +{ + __asm__( + FUNCTION_MARKER + "fcom\n" + "fstsw %ax\n" + "testl $256, %eax\n" + "jz 0f\n" + "fxch\n" + "0:\n" + "fstp %st(0)\n" + FUNCTION_MARKER + ); + +} +void nseel_asm_min_fp_end(void) {} + +void nseel_asm_max_fp(void) +{ + __asm__( + FUNCTION_MARKER + "fcom\n" + "fstsw %ax\n" + "testl $256, %eax\n" + "jnz 0f\n" + "fxch\n" + "0:\n" + "fstp %st(0)\n" + FUNCTION_MARKER + ); +} +void nseel_asm_max_fp_end(void) {} + + + +// just generic functions left, yay + + + + +void _asm_generic3parm(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 + +#ifdef AMD64ABI + + "movl %rsi, %r15\n" + "movl %rdi, %rdx\n" // third parameter = parm + "movl $0xfefefefe, %rdi\n" // first parameter= context + + "movl %ecx, %rsi\n" // second parameter = parm + "movl %rax, %rcx\n" // fourth parameter = parm + "movl $0xfefefefe, %rax\n" // call function + "call *%rax\n" + + "movl %r15, %rsi\n" +#else + "movl %ecx, %edx\n" // second parameter = parm + "movl $0xfefefefe, %ecx\n" // first parameter= context + "movl %rdi, %r8\n" // third parameter = parm + "movl %rax, %r9\n" // fourth parameter = parm + "movl $0xfefefefe, %edi\n" // call function + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" +#endif + +#else + + "movl $0xfefefefe, %edx\n" + "pushl %eax\n" // push parameter + "pushl %edi\n" // push parameter + "movl $0xfefefefe, %edi\n" + "pushl %ecx\n" // push parameter + "pushl %edx\n" // push context pointer + "call *%edi\n" + "addl $16, %esp\n" + +#endif + FUNCTION_MARKER + ); +} +void _asm_generic3parm_end(void) {} + + +void _asm_generic3parm_retd(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 +#ifdef AMD64ABI + "movl %rsi, %r15\n" + "movl %rdi, %rdx\n" // third parameter = parm + "movl $0xfefefefe, %rdi\n" // first parameter= context + "movl %ecx, %rsi\n" // second parameter = parm + "movl %rax, %rcx\n" // fourth parameter = parm + "movl $0xfefefefe, %rax\n" // call function + "call *%rax\n" + "movl %r15, %rsi\n" + "movq xmm0, (%r15)\n" + "fldl (%r15)\n" +#else + "movl %ecx, %edx\n" // second parameter = parm + "movl $0xfefefefe, %ecx\n" // first parameter= context + "movl %rdi, %r8\n" // third parameter = parm + "movl %rax, %r9\n" // fourth parameter = parm + "movl $0xfefefefe, %edi\n" // call function + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + "movq xmm0, (%rsi)\n" + "fldl (%rsi)\n" +#endif +#else + + "subl $16, %esp\n" + "movl $0xfefefefe, %edx\n" + "movl %edi, 8(%esp)\n" + "movl $0xfefefefe, %edi\n" + "movl %eax, 12(%esp)\n" + "movl %ecx, 4(%esp)\n" + "movl %edx, (%esp)\n" + "call *%edi\n" + "addl $16, %esp\n" + +#endif + FUNCTION_MARKER + ); +} +void _asm_generic3parm_retd_end(void) {} + + +void _asm_generic2parm(void) // this prob neds to be fixed for ppc +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 + +#ifdef AMD64ABI + "movl %rsi, %r15\n" + "movl %edi, %esi\n" // second parameter = parm + "movl $0xfefefefe, %edi\n" // first parameter= context + "movl %rax, %rdx\n" // third parameter = parm + "movl $0xfefefefe, %rcx\n" // call function + "call *%rcx\n" + "movl %r15, %rsi\n" +#else + "movl $0xfefefefe, %ecx\n" // first parameter= context + "movl %edi, %edx\n" // second parameter = parm + "movl %rax, %r8\n" // third parameter = parm + "movl $0xfefefefe, %edi\n" // call function + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" +#endif +#else + + "movl $0xfefefefe, %edx\n" + "movl $0xfefefefe, %ecx\n" + "subl $4, %esp\n" // keep stack aligned + "pushl %eax\n" // push parameter + "pushl %edi\n" // push parameter + "pushl %edx\n" // push context pointer + "call *%ecx\n" + "addl $16, %esp\n" + +#endif + FUNCTION_MARKER + ); +} +void _asm_generic2parm_end(void) {} + + +void _asm_generic2parm_retd(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 +#ifdef AMD64ABI + "movl %rsi, %r15\n" + "movl %rdi, %rsi\n" // second parameter = parm + "movl $0xfefefefe, %rdi\n" // first parameter= context + "movl $0xfefefefe, %rcx\n" // call function + "movl %rax, %rdx\n" // third parameter = parm + "call *%rcx\n" + "movl %r15, %rsi\n" + "movq xmm0, (%r15)\n" + "fldl (%r15)\n" +#else + "movl %rdi, %rdx\n" // second parameter = parm + "movl $0xfefefefe, %rcx\n" // first parameter= context + "movl $0xfefefefe, %rdi\n" // call function + "movl %rax, %r8\n" // third parameter = parm + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + "movq xmm0, (%rsi)\n" + "fldl (%rsi)\n" +#endif +#else + + "subl $16, %esp\n" + "movl $0xfefefefe, %edx\n" + "movl $0xfefefefe, %ecx\n" + "movl %edx, (%esp)\n" + "movl %edi, 4(%esp)\n" + "movl %eax, 8(%esp)\n" + "call *%ecx\n" + "addl $16, %esp\n" + +#endif + FUNCTION_MARKER + ); +} +void _asm_generic2parm_retd_end(void) {} + + + + + +void _asm_generic1parm(void) +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 +#ifdef AMD64ABI + "movl $0xfefefefe, %rdi\n" // first parameter= context + "movl %rsi, %r15\n" + "movl %eax, %rsi\n" // second parameter = parm + "movl $0xfefefefe, %rcx\n" // call function + "call *%rcx\n" + "movl %r15, %rsi\n" +#else + "movl $0xfefefefe, %ecx\n" // first parameter= context + "movl %eax, %edx\n" // second parameter = parm + "movl $0xfefefefe, %edi\n" // call function + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" +#endif +#else + + "movl $0xfefefefe, %edx\n" + "subl $8, %esp\n" // keep stack aligned + "movl $0xfefefefe, %ecx\n" + "pushl %eax\n" // push parameter + "pushl %edx\n" // push context pointer + "call *%ecx\n" + "addl $16, %esp\n" + +#endif + + FUNCTION_MARKER + ); +} +void _asm_generic1parm_end(void) {} + + +void _asm_generic1parm_retd(void) // 1 parameter returning double +{ + __asm__( + FUNCTION_MARKER +#ifdef TARGET_X64 +#ifdef AMD64ABI + "movl $0xfefefefe, %rdi\n" // first parameter = context pointer + "movl $0xfefefefe, %rcx\n" // function address + "movl %rsi, %r15\n" // save rsi + "movl %rax, %rsi\n" // second parameter = parameter + + "call *%rcx\n" + + "movl %r15, %rsi\n" + "movq xmm0, (%r15)\n" + "fldl (%r15)\n" +#else + "movl $0xfefefefe, %ecx\n" // first parameter= context + "movl $0xfefefefe, %edi\n" // call function + + "movl %rax, %rdx\n" // second parameter = parm + + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%edi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + "movq xmm0, (%rsi)\n" + "fldl (%rsi)\n" +#endif +#else + + "movl $0xfefefefe, %edx\n" // context pointer + "movl $0xfefefefe, %ecx\n" // func-addr + "subl $16, %esp\n" + "movl %eax, 4(%esp)\n" // push parameter + "movl %edx, (%esp)\n" // push context pointer + "call *%ecx\n" + "addl $16, %esp\n" + +#endif + FUNCTION_MARKER + ); +} +void _asm_generic1parm_retd_end(void) {} + + + + + +// this gets its own stub because it's pretty crucial for performance :/ + +void _asm_megabuf(void) +{ + __asm__( + + FUNCTION_MARKER + +#ifdef TARGET_X64 + + +#ifdef AMD64ABI + + "fadd" EEL_F_SUFFIX " -8(%r12)\n" + + "fistpl (%rsi)\n" + + // check if (%rsi) is in range, and buffer available, otherwise call function + "movl (%rsi), %edx\n" + "cmpl %1, %rdx\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK)) + "jae 0f\n" + "movll %rdx, %rax\n" + "shrll %2, %rax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3/*log2(sizeof(void *))*/ ) + "andll %3, %rax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*8 /*sizeof(void*)*/ ) + "movll (%r12, %rax), %rax\n" + "testl %rax, %rax\n" + "jnz 1f\n" + "0:\n" + "movl $0xfefefefe, %rax\n" + "movl %r12, %rdi\n" // set first parm to ctx + "movl %rsi, %r15\n" // save rsi + "movl %rdx, %esi\n" // esi becomes second parameter (edi is first, context pointer) + "call *%rax\n" + "movl %r15, %rsi\n" // restore rsi + "jmp 2f\n" + "1:\n" + "andll %4, %rdx\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1) + "shlll $3, %rdx\n" // 3 is log2(sizeof(EEL_F)) + "addll %rdx, %rax\n" + "2:\n" + +#else + + "fadd" EEL_F_SUFFIX " -8(%r12)\n" + + "fistpl (%rsi)\n" + + // check if (%rsi) is in range... + "movl (%rsi), %edi\n" + "cmpl %1, %edi\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK)) + "jae 0f\n" + "movll %rdi, %rax\n" + "shrll %2, %rax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3/*log2(sizeof(void *))*/ ) + "andll %3, %rax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*8 /*sizeof(void*)*/ ) + "movll (%r12, %rax), %rax\n" + "testl %rax, %rax\n" + "jnz 1f\n" + "0:\n" + "movl $0xfefefefe, %rax\n" // function ptr + "movl %r12, %rcx\n" // set first parm to ctx + "movl %rdi, %rdx\n" // rdx is second parameter (rcx is first) + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%rax\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" + "jmp 2f\n" + "1:\n" + "andll %4, %rdi\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1) + "shlll $3, %rdi\n" // 3 is log2(sizeof(EEL_F)) + "addll %rdi, %rax\n" + "2:\n" +#endif + + + FUNCTION_MARKER +#else + "fadd" EEL_F_SUFFIX " -8(%%ebx)\n" + "fistpl (%%esi)\n" + + // check if (%esi) is in range, and buffer available, otherwise call function + "movl (%%esi), %%edi\n" + "cmpl %0, %%edi\n" //REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK)) + "jae 0f\n" + + "movl %%edi, %%eax\n" + "shrl %1, %%eax\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 2/*log2(sizeof(void *))*/ ) + "andl %2, %%eax\n" //REPLACE=((NSEEL_RAM_BLOCKS-1)*4 /*sizeof(void*)*/ ) + "movl (%%ebx, %%eax), %%eax\n" + "testl %%eax, %%eax\n" + "jnz 1f\n" + "0:\n" + "subl $8, %%esp\n" // keep stack aligned + "movl $0xfefefefe, %%ecx\n" + "pushl %%edi\n" // parameter + "pushl %%ebx\n" // push context pointer + "call *%%ecx\n" + "addl $16, %%esp\n" + "jmp 2f\n" + "1:\n" + "andl %3, %%edi\n" //REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1) + "shll $3, %%edi\n" // 3 is log2(sizeof(EEL_F)) + "addl %%edi, %%eax\n" + "2:" + FUNCTION_MARKER + + #ifndef _MSC_VER + :: "i" (((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))), + "i" ((NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 2/*log2(sizeof(void *))*/ )), + "i" (((NSEEL_RAM_BLOCKS-1)*4 /*sizeof(void*)*/ )), + "i" ((NSEEL_RAM_ITEMSPERBLOCK-1 )) + #endif + + + +#endif + + ); +} + +void _asm_megabuf_end(void) {} + + +void _asm_gmegabuf(void) +{ + __asm__( + + FUNCTION_MARKER + +#ifdef TARGET_X64 + + +#ifdef AMD64ABI + + "movl %rsi, %r15\n" + "fadd" EEL_F_SUFFIX " -8(%r12)\n" + "movl $0xfefefefe, %rdi\n" // first parameter = context pointer + "fistpl (%rsi)\n" + "movl $0xfefefefe, %edx\n" + "movl (%rsi), %esi\n" + "call *%rdx\n" + "movl %r15, %rsi\n" + +#else + "fadd" EEL_F_SUFFIX " -8(%r12)\n" + "movl $0xfefefefe, %rcx\n" // first parameter = context pointer + "fistpl (%rsi)\n" + "movl $0xfefefefe, %rdi\n" + "movl (%rsi), %edx\n" + "subl X64_EXTRA_STACK_SPACE, %rsp\n" + "call *%rdi\n" + "addl X64_EXTRA_STACK_SPACE, %rsp\n" +#endif + + +#else + "subl $16, %esp\n" // keep stack aligned + "movl $0xfefefefe, (%esp)\n" + "fadd" EEL_F_SUFFIX " -8(%ebx)\n" + "movl $0xfefefefe, %edi\n" + "fistpl 4(%esp)\n" + "call *%edi\n" + "addl $16, %esp\n" + +#endif + + + + FUNCTION_MARKER + ); +} + +void _asm_gmegabuf_end(void) {} + +void nseel_asm_stack_push(void) +{ +#ifdef TARGET_X64 + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %rdi\n" + "movll (%rax), %rcx\n" + "movll (%rdi), %rax\n" + "addll $8, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "andll %rdx, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "orll %rdx, %rax\n" + "movll %rcx, (%rax)\n" + "movll %rax, (%rdi)\n" + FUNCTION_MARKER + ); +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + + "movl (%eax), %ecx\n" + "movl 4(%eax), %edx\n" + + "movl (%edi), %eax\n" + + "addl $8, %eax\n" + "andl $0xfefefefe, %eax\n" + "orl $0xfefefefe, %eax\n" + + "movl %ecx, (%eax)\n" + "movl %edx, 4(%eax)\n" + + "movl %eax, (%edi)\n" + FUNCTION_MARKER + ); + +#endif + +} +void nseel_asm_stack_push_end(void) {} + + + +void nseel_asm_stack_pop(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %rdi\n" + "movll (%rdi), %rcx\n" + "movq (%rcx), %xmm0\n" + "subll $8, %rcx\n" + "movl $0xFEFEFEFE, %rdx\n" + "andll %rdx, %rcx\n" + "movl $0xFEFEFEFE, %rdx\n" + "orll %rdx, %rcx\n" + "movll %rcx, (%rdi)\n" + "movq %xmm0, (%eax)\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "movl (%edi), %ecx\n" + "fld" EEL_F_SUFFIX " (%ecx)\n" + "subl $8, %ecx\n" + "andl $0xfefefefe, %ecx\n" + "orl $0xfefefefe, %ecx\n" + "movl %ecx, (%edi)\n" + "fstp" EEL_F_SUFFIX " (%eax)\n" + FUNCTION_MARKER + ); + +#endif +} +void nseel_asm_stack_pop_end(void) {} + + +void nseel_asm_stack_pop_fast(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %rdi\n" + "movll (%rdi), %rcx\n" + "movll %rcx, %rax\n" + "subll $8, %rcx\n" + "movl $0xFEFEFEFE, %rdx\n" + "andll %rdx, %rcx\n" + "movl $0xFEFEFEFE, %rdx\n" + "orll %rdx, %rcx\n" + "movll %rcx, (%rdi)\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "movl (%edi), %ecx\n" + "movl %ecx, %eax\n" + "subl $8, %ecx\n" + "andl $0xfefefefe, %ecx\n" + "orl $0xfefefefe, %ecx\n" + "movl %ecx, (%edi)\n" + FUNCTION_MARKER + ); + +#endif +} +void nseel_asm_stack_pop_fast_end(void) {} + +void nseel_asm_stack_peek_int(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll $0xfefefefe, %rdi\n" + "movll (%rdi), %rax\n" + "movl $0xfefefefe, %rdx\n" + "subll %rdx, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "andll %rdx, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "orll %rdx, %rax\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "movl (%edi), %eax\n" + "movl $0xfefefefe, %edx\n" + "subl %edx, %eax\n" + "andl $0xfefefefe, %eax\n" + "orl $0xfefefefe, %eax\n" + FUNCTION_MARKER + ); + +#endif + +} +void nseel_asm_stack_peek_int_end(void) {} + + + +void nseel_asm_stack_peek(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll $0xfefefefe, %rdi\n" + "fistpl (%rsi)\n" + "movll (%rdi), %rax\n" + "movll (%rsi), %rdx\n" + "shll $3, %rdx\n" // log2(sizeof(EEL_F)) + "subl %rdx, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "andll %rdx, %rax\n" + "movl $0xFEFEFEFE, %rdx\n" + "orll %rdx, %rax\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "fistpl (%esi)\n" + "movl (%edi), %eax\n" + "movl (%esi), %edx\n" + "shll $3, %edx\n" // log2(sizeof(EEL_F)) + "subl %edx, %eax\n" + "andl $0xfefefefe, %eax\n" + "orl $0xfefefefe, %eax\n" + FUNCTION_MARKER + ); + +#endif + +} +void nseel_asm_stack_peek_end(void) {} + + +void nseel_asm_stack_peek_top(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll $0xfefefefe, %rdi\n" + "movll (%rdi), %rax\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "movl (%edi), %eax\n" + FUNCTION_MARKER + ); + +#endif + +} +void nseel_asm_stack_peek_top_end(void) {} + +void nseel_asm_stack_exch(void) +{ +#ifdef TARGET_X64 + + __asm__( + FUNCTION_MARKER + "movll $0xfefefefe, %rdi\n" + "movll (%rdi), %rcx\n" + "movq (%rcx), %xmm0\n" + "movq (%rax), %xmm1\n" + "movq %xmm0, (%rax)\n" + "movq %xmm1, (%rcx)\n" + FUNCTION_MARKER + ); + +#else + + __asm__( + FUNCTION_MARKER + "movl $0xfefefefe, %edi\n" + "movl (%edi), %ecx\n" + "fld" EEL_F_SUFFIX " (%ecx)\n" + "fld" EEL_F_SUFFIX " (%eax)\n" + "fstp" EEL_F_SUFFIX " (%ecx)\n" + "fstp" EEL_F_SUFFIX " (%eax)\n" + FUNCTION_MARKER + ); + +#endif + +} +void nseel_asm_stack_exch_end(void) {} + +#ifdef TARGET_X64 +void eel_callcode64() +{ + __asm__( +#ifndef EEL_X64_NO_CHANGE_FPFLAGS + "subl $16, %rsp\n" + "fnstcw (%rsp)\n" + "mov (%rsp), %ax\n" + "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions + "mov %ax, 4(%rsp)\n" + "fldcw 4(%rsp)\n" +#endif + "push %rbx\n" + "push %rbp\n" + "push %r12\n" + "push %r13\n" + "push %r14\n" + "push %r15\n" + +#ifdef AMD64ABI + "movll %rsi, %r12\n" // second parameter is ram-blocks pointer + "call %rdi\n" +#else + "push %rdi\n" + "push %rsi\n" + "movll %rdx, %r12\n" // second parameter is ram-blocks pointer + "call %rcx\n" + "pop %rsi\n" + "pop %rdi\n" +#endif + + "fclex\n" + + "pop %r15\n" + "pop %r14\n" + "pop %r13\n" + "pop %r12\n" + "pop %rbp\n" + "pop %rbx\n" + +#ifndef EEL_X64_NO_CHANGE_FPFLAGS + "fldcw (%rsp)\n" + "addl $16, %rsp\n" +#endif + + "ret\n" + ); +} + +void eel_callcode64_fast() +{ + __asm__( + "push %rbx\n" + "push %rbp\n" + "push %r12\n" + "push %r13\n" + "push %r14\n" + "push %r15\n" + +#ifdef AMD64ABI + "movll %rsi, %r12\n" // second parameter is ram-blocks pointer + "call %rdi\n" +#else + "push %rdi\n" + "push %rsi\n" + "movll %rdx, %r12\n" // second parameter is ram-blocks pointer + "call %rcx\n" + "pop %rsi\n" + "pop %rdi\n" +#endif + + "pop %r15\n" + "pop %r14\n" + "pop %r13\n" + "pop %r12\n" + "pop %rbp\n" + "pop %rbx\n" + + "ret\n" + ); +} + +void eel_setfp_round() +{ + __asm__( +#ifndef EEL_X64_NO_CHANGE_FPFLAGS + "subl $16, %rsp\n" + "fnstcw (%rsp)\n" + "mov (%rsp), %ax\n" + "and $0xF3FF, %ax\n" // set round to nearest + "mov %ax, 4(%rsp)\n" + "fldcw 4(%rsp)\n" + "addl $16, %rsp\n" +#endif + "ret\n" + ); +} + +void eel_setfp_trunc() +{ + __asm__( +#ifndef EEL_X64_NO_CHANGE_FPFLAGS + "subl $16, %rsp\n" + "fnstcw (%rsp)\n" + "mov (%rsp), %ax\n" + "or $0xC00, %ax\n" // set to truncate + "mov %ax, 4(%rsp)\n" + "fldcw 4(%rsp)\n" + "addl $16, %rsp\n" +#endif + "ret\n" + ); +} + +void eel_enterfp(int s[2]) +{ + __asm__( +#ifdef AMD64ABI + "fnstcw (%rdi)\n" + "mov (%rdi), %ax\n" + "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions + "mov %ax, 4(%rdi)\n" + "fldcw 4(%rdi)\n" +#else + "fnstcw (%rcx)\n" + "mov (%rcx), %ax\n" + "or $0xE3F, %ax\n" // 53 or 64 bit precision, trunc, and masking all exceptions + "mov %ax, 4(%rcx)\n" + "fldcw 4(%rcx)\n" +#endif + "ret\n" + ); +} +void eel_leavefp(int s[2]) +{ + __asm__( +#ifdef AMD64ABI + "fldcw (%rdi)\n" +#else + "fldcw (%rcx)\n" +#endif + "ret\n"; + ); +} + +#endif |