option casemap:none .CODE ; import Coroutine_Finish with its mangled Microsoft Visual C++ name ?Coroutine_Finish@@YAXXZ PROTO ; extern "C" void SaveNonVolatileRegs( uintptr_t regs[8] ); ; incoming parameter is rcs SaveNonVolatileRegs PROC FRAME .endprolog mov qword ptr[rcx], rbx mov qword ptr[rcx+8], rbp mov qword ptr[rcx+16], rsi mov qword ptr[rcx+24], rdi mov qword ptr[rcx+32], r12 mov qword ptr[rcx+40], r13 mov qword ptr[rcx+48], r14 mov qword ptr[rcx+56], r15 ret SaveNonVolatileRegs ENDP ; extern "C" void NORETURN Coroutine_Launch_ASM( byte **ppStackHigh, uintptr_t **ppLaunchParentFramePtr, void (*pfnExec)( void* ), void *pvParam ) ; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address Coroutine_Launch_ASM PROC FRAME ; x64 prolog and prolog description macros: ; save caller's nonvolatile registers (pushed in reverse order to match SaveNonVolatileRegs) ; so that we can slam new values in later to trick the x64 callstack unwind procedure push r15 .pushreg r15 push r14 .pushreg r14 push r13 .pushreg r13 push r12 .pushreg r12 push rdi .pushreg rdi push rsi .pushreg rsi push rbp .pushreg rbp push rbx .pushreg rbx ; stack-allocate Win64 function call shadow space for calls to pfnExec and Coroutine_Finish, ; plus 8 additional bytes to align the stack frame properly (comes in off by 8) sub rsp, 28h .allocstack 28h .endprolog ; compute top of stack for coroutine: 40 bytes for stack, 64 for saved regs, 8 for return address ; (we do not bother including the additional unused 32 byte shadow space we own above that) lea rax, [rsp+70h] mov qword ptr [rcx], rax ; save off the address of our saved regs so that we can memcpy over them later and trick ; the x64 stack unwind logic into walking up to a different Internal_Coroutine_Continue lea rax, [rsp+28h] mov qword ptr [rdx], rax ; call pfnExec(pvParam) mov rcx, r9 call r8 ; call Coroutine_Finish - does not return call ?Coroutine_Finish@@YAXXZ Coroutine_Launch_ASM ENDP ; Needs to match definition found in setjmp.h _JUMP_BUFFER STRUCT m_Frame QWORD ? m_Rbx QWORD ? m_Rsp QWORD ? m_Rbp QWORD ? m_Rsi QWORD ? m_Rdi QWORD ? m_R12 QWORD ? m_R13 QWORD ? m_R14 QWORD ? m_R15 QWORD ? m_Rip QWORD ? m_MxCsr DWORD ? m_FpCsr WORD ? m_Spare WORD ? m_Xmm6 XMMWORD ? m_Xmm7 XMMWORD ? m_Xmm8 XMMWORD ? m_Xmm9 XMMWORD ? m_Xmm10 XMMWORD ? m_Xmm11 XMMWORD ? m_Xmm12 XMMWORD ? m_Xmm13 XMMWORD ? m_Xmm14 XMMWORD ? m_Xmm15 XMMWORD ? _JUMP_BUFFER ENDS ;This is the reference asm for __intrinsic_setjmp() in VS2015 ;mov qword ptr [rcx],rdx ; intrinsic call site does "mov rdx,rbp" followed by "add rdx,0FFFFFFFFFFFFFFC0h", looks like a nonstandard abi ;mov qword ptr [rcx+8],rbx ;mov qword ptr [rcx+18h],rbp ;mov qword ptr [rcx+20h],rsi ;mov qword ptr [rcx+28h],rdi ;mov qword ptr [rcx+30h],r12 ;mov qword ptr [rcx+38h],r13 ;mov qword ptr [rcx+40h],r14 ;mov qword ptr [rcx+48h],r15 ;lea r8,[rsp+8] ; rsp set to post-return address ;mov qword ptr [rcx+10h],r8 ;mov r8,qword ptr [rsp] ;mov qword ptr [rcx+50h],r8 ;stmxcsr dword ptr [rcx+58h] ;fnstcw word ptr [rcx+5Ch] ;movdqa xmmword ptr [rcx+60h],xmm6 ;ovdqa xmmword ptr [rcx+70h],xmm7 ;movdqa xmmword ptr [rcx+80h],xmm8 ;movdqa xmmword ptr [rcx+90h],xmm9 ;movdqa xmmword ptr [rcx+0A0h],xmm10 ;movdqa xmmword ptr [rcx+0B0h],xmm11 ;movdqa xmmword ptr [rcx+0C0h],xmm12 ;movdqa xmmword ptr [rcx+0D0h],xmm13 ;movdqa xmmword ptr [rcx+0E0h],xmm14 ;movdqa xmmword ptr [rcx+0F0h],xmm15 ;xor eax,eax ;ret ; extern "C" void NORETURN Coroutine_LongJmp_UnChecked( jmp_buf buf, int nResult ) ; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address Coroutine_LongJmp_Unchecked PROC ;load nResult into result from initial setjmp() xor rax, rax mov eax, edx ;restore to setjmp() caller state mov rdx, [rcx]._JUMP_BUFFER.m_Frame ; appears to be an error checking value of (_JUMP_BUFFER.m_Rbp + 0FFFFFFFFFFFFFFC0h) passed non-standardly through rdx to setjmp() mov rbx, [rcx]._JUMP_BUFFER.m_Rbx mov rsp, [rcx]._JUMP_BUFFER.m_Rsp mov rbp, [rcx]._JUMP_BUFFER.m_Rbp mov rsi, [rcx]._JUMP_BUFFER.m_Rsi mov rdi, [rcx]._JUMP_BUFFER.m_Rdi mov r12, [rcx]._JUMP_BUFFER.m_R12 mov r13, [rcx]._JUMP_BUFFER.m_R13 mov r14, [rcx]._JUMP_BUFFER.m_R14 mov r15, [rcx]._JUMP_BUFFER.m_R15 mov r10, [rcx]._JUMP_BUFFER.m_Rip ; store return address in r10 for return ldmxcsr [rcx]._JUMP_BUFFER.m_MxCsr fldcw [rcx]._JUMP_BUFFER.m_FpCsr ;[rcx]._JUMP_BUFFER.m_Spare movaps xmm6, [rcx]._JUMP_BUFFER.m_Xmm6 movaps xmm7, [rcx]._JUMP_BUFFER.m_Xmm7 movaps xmm8, [rcx]._JUMP_BUFFER.m_Xmm8 movaps xmm9, [rcx]._JUMP_BUFFER.m_Xmm9 movaps xmm10, [rcx]._JUMP_BUFFER.m_Xmm10 movaps xmm11, [rcx]._JUMP_BUFFER.m_Xmm11 movaps xmm12, [rcx]._JUMP_BUFFER.m_Xmm12 movaps xmm13, [rcx]._JUMP_BUFFER.m_Xmm13 movaps xmm14, [rcx]._JUMP_BUFFER.m_Xmm14 movaps xmm15, [rcx]._JUMP_BUFFER.m_Xmm15 ;jmp instead of ret to _JUMP_BUFFER.m_Rip because setjmp() already set the _JUMP_BUFFER.m_Rsp to the post-return state db 048h ; emit a REX prefix on the jmp to ensure it's a full qword jmp qword ptr r10 Coroutine_LongJmp_Unchecked ENDP _TEXT ENDS END