fx.reflection: optimise linux dynamic function calling

This commit is contained in:
2026-05-04 19:37:44 +01:00
parent bebc67537d
commit bf251dd818
+35 -117
View File
@@ -1,3 +1,6 @@
.extern memcpy
.type memcpy, @function
.global callvm_invoke_i .global callvm_invoke_i
.type callvm_invoke_i, @function .type callvm_invoke_i, @function
@@ -14,147 +17,62 @@ callvm_invoke_i:
push %r14 push %r14
push %r15 push %r15
# move our parameters out of the way
mov %rdi, %r11 mov %rdi, %r11
mov %rsi, %r12 mov %rsi, %r12
# First, set up the fixed arguments
# %r13 = fixed arg count
movq (%r12), %r13
# arg[0]
cmp $0, %r13
jle .idone
movq 48(%r12), %rdi
# arg[1]
cmp $1, %r13
jle .idone
movq 56(%r12), %rsi
# arg[2]
cmp $2, %r13
jle .idone
movq 64(%r12), %rdx
# arg[3]
cmp $3, %r13
jle .idone
movq 72(%r12), %rcx
# arg[4]
cmp $4, %r13
jle .idone
movq 80(%r12), %r8
# arg[5]
cmp $5, %r13
jle .idone
movq 88(%r12), %r9
.idone:
# Next, set up the fixed double arguments
# r13 = fixed double arg count
movq 8(%r12), %r13
# arg[0]
cmp $0, %r13
jle .vdone
movq 96(%r12), %xmm0
# arg[1]
cmp $1, %r13
jle .vdone
movq 104(%r12), %xmm1
# arg[2]
cmp $2, %r13
jle .vdone
movq 112(%r12), %xmm2
# arg[3]
cmp $3, %r13
jle .vdone
movq 120(%r12), %xmm3
# arg[4]
cmp $4, %r13
jle .vdone
movq 128(%r12), %xmm4
# arg[5]
cmp $5, %r13
jle .vdone
movq 136(%r12), %xmm5
# arg[6]
cmp $6, %r13
jle .vdone
movq 144(%r12), %xmm6
# arg[7]
cmp $7, %r13
jle .vdone
movq 152(%r12), %xmm7
.vdone:
# Finally, set up the variable arguments
# x8 = excess arg count
movq 32(%r12), %r13
# calculate the amount of stack space needed for the varargs # calculate the amount of stack space needed for the varargs
movq 32(%r12), %r13
shl $3, %r13 shl $3, %r13
andq $0xFFFFFFFFFFFFFFF0, %r13
addq $0x10, %r13
# allocate the stack space # allocate the stack space
push %rsp
sub %r13, %rsp sub %r13, %rsp
mov %rsp, %r14 andq $0xFFFFFFFFFFFFFFF0, %rsp # re-align the stack
# convert buffer size back to number of arguments # copy the excess args to the stack
movq 32(%r12), %r13 mov %rsp, %rdi
mov 160(%r12), %rsi
mov %r13, %rdx
call memcpy
# r13: number of var args (decrements with every iteration) # Next, set up the fixed integer arguments
# r14: var arg dest pointer (increments with every iteration)
# r15: arg src buffer (increments with every iteration)
mov 160(%r12), %r15
.loop: movq 48(%r12), %rdi # int arg 0
cmp $0, %r13 movq 56(%r12), %rsi # int arg 1
je .loop_end movq 64(%r12), %rdx # int arg 2
movq 72(%r12), %rcx # int arg 3
movq 80(%r12), %r8 # int arg 4
movq 88(%r12), %r9 # int arg 5
# read the arg value from the src pointer # Finally, set up the fixed double arguments
mov (%r15), %rax
# write it to the stack, and increment the dest pointer movq 96(%r12), %xmm0 # double arg 0
mov %rax, (%r14) movq 104(%r12), %xmm1 # double arg 1
add $8, %r14 movq 112(%r12), %xmm2 # double arg 2
movq 120(%r12), %xmm3 # double arg 3
movq 128(%r12), %xmm4 # double arg 4
movq 136(%r12), %xmm5 # double arg 5
movq 144(%r12), %xmm6 # double arg 6
movq 152(%r12), %xmm7 # double arg 7
# increment the src pointer, decrement the arg count # set the number of vararg double parameters
sub $1, %r13 # as required by the ABI
add $8, %r15 mov 168(%r12), %rax
jmp .loop
.loop_end:
# call the function implementation # call the function implementation
mov 168(%r12), %rax
call *%r11 call *%r11
# de-allocate the stack varargs buffer (the size is now stored in x19) # Restore the stack pointer (deallocating the varargs buffer)
movq 32(%r12), %r13 mov -40(%rbp), %rsp
shl $3, %r13
andq $0xFFFFFFFFFFFFFFF0, %r13
addq $0x10, %r13
addq %r13, %rsp
# Restore callee-saved registers
pop %r15 pop %r15
pop %r14 pop %r14
pop %r13 pop %r13
pop %r12 pop %r12
# restore the saved stack frame and link pointer # restore the saved stack frame
pop %rbp pop %rbp
ret ret