@@ -63,81 +63,69 @@ pub unsafe fn init(
6363 #[ cfg( not( target_vendor = "apple" ) ) ]
6464 #[ naked]
6565 unsafe extern "C" fn trampoline_1 ( ) {
66- llvm_asm ! (
67- r#"
68- # gdb has a hardcoded check that rejects backtraces where frame addresses
69- # do not monotonically decrease. It is turned off if the function is called
70- # "__morestack" and that is hardcoded. So, to make gdb backtraces match
71- # the actual unwinder behavior, we call ourselves "__morestack" and mark
72- # the symbol as local; it shouldn't interfere with anything.
73- __morestack:
74- .local __morestack
75-
76- # Set up the first part of our DWARF CFI linking stacks together. When
77- # we reach this function from unwinding, %rbp will be pointing at the bottom
78- # of the parent linked stack. This link is set each time swap() is called.
79- # When unwinding the frame corresponding to this function, a DWARF unwinder
80- # will use %rbp+16 as the next call frame address, restore return address
81- # from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
82- # of `swap_trampoline` does.
83- .cfi_def_cfa %rbp, 16
84- .cfi_offset %rbp, -16
85-
86- # This nop is here so that the initial swap doesn't return to the start
87- # of the trampoline, which confuses the unwinder since it will look for
88- # frame information in the previous symbol rather than this one. It is
89- # never actually executed.
90- nop
91-
92- # Stack unwinding in some versions of libunwind doesn't seem to like
93- # 1-byte symbols, so we add a second nop here. This instruction isn't
94- # executed either, it is only here to pad the symbol size.
95- nop
96-
97- .Lend:
98- .size __morestack, .Lend-__morestack
99- "#
100- : : : : "volatile" )
66+ asm ! (
67+ // gdb has a hardcoded check that rejects backtraces where frame addresses
68+ // do not monotonically decrease. It is turned off if the function is called
69+ // "__morestack" and that is hardcoded. So, to make gdb backtraces match
70+ // the actual unwinder behavior, we call ourselves "__morestack" and mark
71+ // the symbol as local; it shouldn't interfere with anything.
72+ "__morestack:" ,
73+ ".local __morestack" ,
74+ // Set up the first part of our DWARF CFI linking stacks together. When
75+ // we reach this function from unwinding, %rbp will be pointing at the bottom
76+ // of the parent linked stack. This link is set each time swap() is called.
77+ // When unwinding the frame corresponding to this function, a DWARF unwinder
78+ // will use %rbp+16 as the next call frame address, restore return address
79+ // from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
80+ // of `swap_trampoline` does.
81+ ".cfi_def_cfa rbp, 16" ,
82+ ".cfi_offset rbp, -16" ,
83+ // This nop is here so that the initial swap doesn't return to the start
84+ // of the trampoline, which confuses the unwinder since it will look for
85+ // frame information in the previous symbol rather than this one. It is
86+ // never actually executed.
87+ "nop" ,
88+ // Stack unwinding in some versions of libunwind doesn't seem to like
89+ // 1-byte symbols, so we add a second nop here. This instruction isn't
90+ // executed either, it is only here to pad the symbol size.
91+ "nop" ,
92+ ".Lend:" ,
93+ ".size __morestack, .Lend-__morestack" ,
94+ ) ;
10195 }
10296
10397 #[ cfg( target_vendor = "apple" ) ]
10498 #[ naked]
10599 unsafe extern "C" fn trampoline_1 ( ) {
106- llvm_asm ! (
107- r#"
108- # Identical to the above, except avoids .local/.size that aren't available on Mach-O.
109- __morestack:
110- .private_extern __morestack
111- .cfi_def_cfa %rbp, 16
112- .cfi_offset %rbp, -16
113- nop
114- nop
115- "#
116- : : : : "volatile" )
100+ asm ! (
101+ // Identical to the above, except avoids .local/.size that aren't available on Mach-O.
102+ "__morestack:" ,
103+ ".private_extern __morestack" ,
104+ ".cfi_def_cfa rbp, 16" ,
105+ ".cfi_offset rbp, -16" ,
106+ "nop" ,
107+ "nop" ,
108+ )
117109 }
118110
119111 #[ naked]
120112 unsafe extern "C" fn trampoline_2 ( ) {
121- llvm_asm ! (
122- r#"
123- # Set up the second part of our DWARF CFI.
124- # When unwinding the frame corresponding to this function, a DWARF unwinder
125- # will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
126- # This stack slot is updated every time swap() is called to point to the bottom
127- # of the stack of the context switch just switched from.
128- .cfi_def_cfa %rbp, 16
129- .cfi_offset %rbp, -16
130-
131- # This nop is here so that the return address of the swap trampoline
132- # doesn't point to the start of the symbol. This confuses gdb's backtraces,
133- # causing them to think the parent function is trampoline_1 instead of
134- # trampoline_2.
135- nop
136-
137- # Call the provided function.
138- call *16(%rsp)
139- "#
140- : : : : "volatile" )
113+ asm ! (
114+ // Set up the second part of our DWARF CFI.
115+ // When unwinding the frame corresponding to this function, a DWARF unwinder
116+ // will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
117+ // This stack slot is updated every time swap() is called to point to the bottom
118+ // of the stack of the context switch just switched from.
119+ ".cfi_def_cfa rbp, 16" ,
120+ ".cfi_offset rbp, -16" ,
121+ // This nop is here so that the return address of the swap trampoline
122+ // doesn't point to the start of the symbol. This confuses gdb's backtraces,
123+ // causing them to think the parent function is trampoline_1 instead of
124+ // trampoline_2.
125+ "nop" ,
126+ // Call the provided function.
127+ "call [rsp + 16]" ,
128+ ) ;
141129 }
142130
143131 unsafe fn push ( sp : & mut StackPointer , val : usize ) {
@@ -189,55 +177,67 @@ pub unsafe fn swap(
189177
190178 let mut ret: usize ;
191179 let mut ret_sp: * mut usize ;
192- llvm_asm ! (
193- r#"
194- # Push the return address
195- leaq 0f(%rip), %rax
196- pushq %rax
197-
198- # Save frame pointer explicitly; the unwinder uses it to find CFA of
199- # the caller, and so it has to have the correct value immediately after
200- # the call instruction that invoked the trampoline.
201- pushq %rbp
202-
203- # Link the call stacks together by writing the current stack bottom
204- # address to the CFA slot in the new stack.
205- movq %rsp, (%rcx)
206-
207- # Pass the stack pointer of the old context to the new one.
208- movq %rsp, %rsi
209-
210- # Load stack pointer of the new context.
211- movq %rdx, %rsp
212-
213- # Restore frame pointer of the new context.
214- popq %rbp
215-
216- # Return into the new context. Use `pop` and `jmp` instead of a `ret`
217- # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
218- popq %rax
219- jmpq *%rax
220-
221- 0:
222- "#
223- : "={rdi}" ( ret)
224- "={rsi}" ( ret_sp)
225- : "{rdi}" ( arg)
226- "{rdx}" ( new_sp. 0 )
227- "{rcx}" ( new_cfa)
228- : "rax" , "rbx" , "rcx" , "rdx" , /*"rsi", "rdi", "rbp", "rsp",*/
229- "r8" , "r9" , "r10" , "r11" , "r12" , "r13" , "r14" , "r15" ,
230- "mm0" , "mm1" , "mm2" , "mm3" , "mm4" , "mm5" , "mm6" , "mm7" ,
231- "xmm0" , "xmm1" , "xmm2" , "xmm3" , "xmm4" , "xmm5" , "xmm6" , "xmm7" ,
232- "xmm8" , "xmm9" , "xmm10" , "xmm11" , "xmm12" , "xmm13" , "xmm14" , "xmm15" ,
233- "xmm16" , "xmm17" , "xmm18" , "xmm19" , "xmm20" , "xmm21" , "xmm22" , "xmm23" ,
234- "xmm24" , "xmm25" , "xmm26" , "xmm27" , "xmm28" , "xmm29" , "xmm30" , "xmm31" ,
235- "cc" , "dirflag" , "fpsr" , "flags" , "memory"
236- // Ideally, we would set the LLVM "noredzone" attribute on this function
237- // (and it would be propagated to the call site). Unfortunately, rustc
238- // provides no such functionality. Fortunately, by a lucky coincidence,
239- // the "alignstack" LLVM inline assembly option does exactly the same
240- // thing on x86_64.
241- : "volatile" , "alignstack" ) ;
180+
181+ asm ! (
182+ // Push the return address
183+ "lea rax, [rip + 0f]" ,
184+ "push rax" ,
185+ // Save frame pointer explicitly; the unwinder uses it to find CFA of
186+ // the caller, and so it has to have the correct value immediately after
187+ // the call instruction that invoked the trampoline.
188+ "push rbp" ,
189+ // Link the call stacks together by writing the current stack bottom
190+ // address to the CFA slot in the new stack.
191+ "mov [rcx], rsp" ,
192+ // Pass the stack pointer of the old context to the new one.
193+ "mov rsi, rsp" ,
194+ // Load stack pointer of the new context.
195+ "mov rsp, rdx" ,
196+ // Restore frame pointer of the new context.
197+ "pop rbp" ,
198+ // Return into the new context. Use `pop` and `jmp` instead of a `ret`
199+ // to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
200+ "pop rax" ,
201+ "jmp rax" ,
202+ "0:" ,
203+ // Outputs
204+ lateout( "rdi" ) ret,
205+ lateout( "rsi" ) ret_sp,
206+ // Inputs
207+ in( "rdi" ) arg,
208+ in( "rdx" ) new_sp. 0 ,
209+ in( "rcx" ) new_cfa,
210+ // Clobbers
211+ out( "rax" ) _, out( "rbx" ) _, lateout( "rcx" ) _, lateout( "rdx" ) _,
212+ out( "r8" ) _, out( "r9" ) _, out( "r10" ) _, out( "r11" ) _,
213+ out( "r12" ) _, out( "r13" ) _, out( "r14" ) _, out( "r15" ) _,
214+ /*
215+ TODO:
216+ out("mm0") _, out("mm1") _, out("mm2") _, out("mm3") _,
217+ out("mm4") _, out("mm5") _, out("mm6") _, out("mm7") _,
218+ */
219+ out( "xmm0" ) _, out( "xmm1" ) _, out( "xmm2" ) _, out( "xmm3" ) _,
220+ out( "xmm4" ) _, out( "xmm5" ) _, out( "xmm6" ) _, out( "xmm7" ) _,
221+ out( "xmm8" ) _, out( "xmm9" ) _, out( "xmm10" ) _, out( "xmm11" ) _,
222+ out( "xmm12" ) _, out( "xmm13" ) _, out( "xmm14" ) _, out( "xmm15" ) _,
223+ /*
224+ TODO:
225+ out("xmm16") _, out("xmm17") _, out("xmm18") _, out("xmm19") _,
226+ out("xmm20") _, out("xmm21") _, out("xmm22") _, out("xmm23") _,
227+ out("xmm24") _, out("xmm25") _, out("xmm26") _, out("xmm27") _,
228+ out("xmm28") _, out("xmm29") _, out("xmm30") _, out("xmm31") _,
229+ */
230+ /* Options:
231+ rustc emits the following clobbers,
232+ - by *not* specifying `options(preserves_flags)`:
233+ (x86) ~{dirflag},~{flags},~{fpsr}
234+ (ARM/AArch64) ~{cc}
235+ - by *not* specifying `options(nomem)`:
236+ ~{memory}
237+ - by *not* specifying `nostack`:
238+ alignstack
239+ */
240+ ) ;
241+
242242 ( ret, StackPointer ( ret_sp) )
243243}
0 commit comments