Skip to content

Commit 980890a

Browse files
committed
x86_64: Use feature(asm) instead of llvm_asm
1 parent e55d24c commit 980890a

File tree

1 file changed

+116
-116
lines changed

1 file changed

+116
-116
lines changed

src/arch/x86_64.rs

Lines changed: 116 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -63,81 +63,69 @@ pub unsafe fn init(
6363
#[cfg(not(target_vendor = "apple"))]
6464
#[naked]
6565
unsafe extern "C" fn trampoline_1() {
66-
llvm_asm!(
67-
r#"
68-
# gdb has a hardcoded check that rejects backtraces where frame addresses
69-
# do not monotonically decrease. It is turned off if the function is called
70-
# "__morestack" and that is hardcoded. So, to make gdb backtraces match
71-
# the actual unwinder behavior, we call ourselves "__morestack" and mark
72-
# the symbol as local; it shouldn't interfere with anything.
73-
__morestack:
74-
.local __morestack
75-
76-
# Set up the first part of our DWARF CFI linking stacks together. When
77-
# we reach this function from unwinding, %rbp will be pointing at the bottom
78-
# of the parent linked stack. This link is set each time swap() is called.
79-
# When unwinding the frame corresponding to this function, a DWARF unwinder
80-
# will use %rbp+16 as the next call frame address, restore return address
81-
# from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
82-
# of `swap_trampoline` does.
83-
.cfi_def_cfa %rbp, 16
84-
.cfi_offset %rbp, -16
85-
86-
# This nop is here so that the initial swap doesn't return to the start
87-
# of the trampoline, which confuses the unwinder since it will look for
88-
# frame information in the previous symbol rather than this one. It is
89-
# never actually executed.
90-
nop
91-
92-
# Stack unwinding in some versions of libunwind doesn't seem to like
93-
# 1-byte symbols, so we add a second nop here. This instruction isn't
94-
# executed either, it is only here to pad the symbol size.
95-
nop
96-
97-
.Lend:
98-
.size __morestack, .Lend-__morestack
99-
"#
100-
: : : : "volatile")
66+
asm!(
67+
// gdb has a hardcoded check that rejects backtraces where frame addresses
68+
// do not monotonically decrease. It is turned off if the function is called
69+
// "__morestack" and that is hardcoded. So, to make gdb backtraces match
70+
// the actual unwinder behavior, we call ourselves "__morestack" and mark
71+
// the symbol as local; it shouldn't interfere with anything.
72+
"__morestack:",
73+
".local __morestack",
74+
// Set up the first part of our DWARF CFI linking stacks together. When
75+
// we reach this function from unwinding, %rbp will be pointing at the bottom
76+
// of the parent linked stack. This link is set each time swap() is called.
77+
// When unwinding the frame corresponding to this function, a DWARF unwinder
78+
// will use %rbp+16 as the next call frame address, restore return address
79+
// from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
80+
// of `swap_trampoline` does.
81+
".cfi_def_cfa rbp, 16",
82+
".cfi_offset rbp, -16",
83+
// This nop is here so that the initial swap doesn't return to the start
84+
// of the trampoline, which confuses the unwinder since it will look for
85+
// frame information in the previous symbol rather than this one. It is
86+
// never actually executed.
87+
"nop",
88+
// Stack unwinding in some versions of libunwind doesn't seem to like
89+
// 1-byte symbols, so we add a second nop here. This instruction isn't
90+
// executed either, it is only here to pad the symbol size.
91+
"nop",
92+
".Lend:",
93+
".size __morestack, .Lend-__morestack",
94+
);
10195
}
10296

10397
#[cfg(target_vendor = "apple")]
10498
#[naked]
10599
unsafe extern "C" fn trampoline_1() {
106-
llvm_asm!(
107-
r#"
108-
# Identical to the above, except avoids .local/.size that aren't available on Mach-O.
109-
__morestack:
110-
.private_extern __morestack
111-
.cfi_def_cfa %rbp, 16
112-
.cfi_offset %rbp, -16
113-
nop
114-
nop
115-
"#
116-
: : : : "volatile")
100+
asm!(
101+
// Identical to the above, except avoids .local/.size that aren't available on Mach-O.
102+
"__morestack:",
103+
".private_extern __morestack",
104+
".cfi_def_cfa rbp, 16",
105+
".cfi_offset rbp, -16",
106+
"nop",
107+
"nop",
108+
)
117109
}
118110

119111
#[naked]
120112
unsafe extern "C" fn trampoline_2() {
121-
llvm_asm!(
122-
r#"
123-
# Set up the second part of our DWARF CFI.
124-
# When unwinding the frame corresponding to this function, a DWARF unwinder
125-
# will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
126-
# This stack slot is updated every time swap() is called to point to the bottom
127-
# of the stack of the context switch just switched from.
128-
.cfi_def_cfa %rbp, 16
129-
.cfi_offset %rbp, -16
130-
131-
# This nop is here so that the return address of the swap trampoline
132-
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
133-
# causing them to think the parent function is trampoline_1 instead of
134-
# trampoline_2.
135-
nop
136-
137-
# Call the provided function.
138-
call *16(%rsp)
139-
"#
140-
: : : : "volatile")
113+
asm!(
114+
// Set up the second part of our DWARF CFI.
115+
// When unwinding the frame corresponding to this function, a DWARF unwinder
116+
// will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
117+
// This stack slot is updated every time swap() is called to point to the bottom
118+
// of the stack of the context switch just switched from.
119+
".cfi_def_cfa rbp, 16",
120+
".cfi_offset rbp, -16",
121+
// This nop is here so that the return address of the swap trampoline
122+
// doesn't point to the start of the symbol. This confuses gdb's backtraces,
123+
// causing them to think the parent function is trampoline_1 instead of
124+
// trampoline_2.
125+
"nop",
126+
// Call the provided function.
127+
"call [rsp + 16]",
128+
);
141129
}
142130

143131
unsafe fn push(sp: &mut StackPointer, val: usize) {
@@ -189,55 +177,67 @@ pub unsafe fn swap(
189177

190178
let mut ret: usize;
191179
let mut ret_sp: *mut usize;
192-
llvm_asm!(
193-
r#"
194-
# Push the return address
195-
leaq 0f(%rip), %rax
196-
pushq %rax
197-
198-
# Save frame pointer explicitly; the unwinder uses it to find CFA of
199-
# the caller, and so it has to have the correct value immediately after
200-
# the call instruction that invoked the trampoline.
201-
pushq %rbp
202-
203-
# Link the call stacks together by writing the current stack bottom
204-
# address to the CFA slot in the new stack.
205-
movq %rsp, (%rcx)
206-
207-
# Pass the stack pointer of the old context to the new one.
208-
movq %rsp, %rsi
209-
210-
# Load stack pointer of the new context.
211-
movq %rdx, %rsp
212-
213-
# Restore frame pointer of the new context.
214-
popq %rbp
215-
216-
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
217-
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
218-
popq %rax
219-
jmpq *%rax
220-
221-
0:
222-
"#
223-
: "={rdi}" (ret)
224-
"={rsi}" (ret_sp)
225-
: "{rdi}" (arg)
226-
"{rdx}" (new_sp.0)
227-
"{rcx}" (new_cfa)
228-
: "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/
229-
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
230-
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
231-
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
232-
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
233-
"xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
234-
"xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31",
235-
"cc", "dirflag", "fpsr", "flags", "memory"
236-
// Ideally, we would set the LLVM "noredzone" attribute on this function
237-
// (and it would be propagated to the call site). Unfortunately, rustc
238-
// provides no such functionality. Fortunately, by a lucky coincidence,
239-
// the "alignstack" LLVM inline assembly option does exactly the same
240-
// thing on x86_64.
241-
: "volatile", "alignstack");
180+
181+
asm!(
182+
// Push the return address
183+
"lea rax, [rip + 0f]",
184+
"push rax",
185+
// Save frame pointer explicitly; the unwinder uses it to find CFA of
186+
// the caller, and so it has to have the correct value immediately after
187+
// the call instruction that invoked the trampoline.
188+
"push rbp",
189+
// Link the call stacks together by writing the current stack bottom
190+
// address to the CFA slot in the new stack.
191+
"mov [rcx], rsp",
192+
// Pass the stack pointer of the old context to the new one.
193+
"mov rsi, rsp",
194+
// Load stack pointer of the new context.
195+
"mov rsp, rdx",
196+
// Restore frame pointer of the new context.
197+
"pop rbp",
198+
// Return into the new context. Use `pop` and `jmp` instead of a `ret`
199+
// to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
200+
"pop rax",
201+
"jmp rax",
202+
"0:",
203+
// Outputs
204+
lateout("rdi") ret,
205+
lateout("rsi") ret_sp,
206+
// Inputs
207+
in("rdi") arg,
208+
in("rdx") new_sp.0,
209+
in("rcx") new_cfa,
210+
// Clobbers
211+
out("rax") _, out("rbx") _, lateout("rcx") _, lateout("rdx") _,
212+
out("r8") _, out("r9") _, out("r10") _, out("r11") _,
213+
out("r12") _, out("r13") _, out("r14") _, out("r15") _,
214+
/*
215+
TODO:
216+
out("mm0") _, out("mm1") _, out("mm2") _, out("mm3") _,
217+
out("mm4") _, out("mm5") _, out("mm6") _, out("mm7") _,
218+
*/
219+
out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
220+
out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
221+
out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
222+
out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
223+
/*
224+
TODO:
225+
out("xmm16") _, out("xmm17") _, out("xmm18") _, out("xmm19") _,
226+
out("xmm20") _, out("xmm21") _, out("xmm22") _, out("xmm23") _,
227+
out("xmm24") _, out("xmm25") _, out("xmm26") _, out("xmm27") _,
228+
out("xmm28") _, out("xmm29") _, out("xmm30") _, out("xmm31") _,
229+
*/
230+
/* Options:
231+
rustc emits the following clobbers,
232+
- by *not* specifying `options(preserves_flags)`:
233+
(x86) ~{dirflag},~{flags},~{fpsr}
234+
(ARM/AArch64) ~{cc}
235+
- by *not* specifying `options(nomem)`:
236+
~{memory}
237+
- by *not* specifying `nostack`:
238+
alignstack
239+
*/
240+
);
241+
242242
(ret, StackPointer(ret_sp))
243243
}

0 commit comments

Comments
 (0)