1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
| ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR38376.
; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr -verify-machineinstrs=0 | FileCheck %s -check-prefix=WIN_X64
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX
; By default, windows CoreCLR requires an inline prologue stack expansion check
; if more than 4096 bytes are allocated on the stack.
; Prolog stack allocation >= 4096 bytes will require the probe sequence
define i32 @main4k() nounwind {
entry:
; WIN_X64-LABEL:main4k:
; WIN_X64: # %bb.0:
; WIN_X64: movl $4096, %eax
; WIN_X64: xorq %rcx, %rcx
; WIN_X64: movq %rsp, %rdx
; WIN_X64: subq %rax, %rdx
; WIN_X64: cmovbq %rcx, %rdx
; WIN_X64: movq %gs:16, %rcx
; WIN_X64: cmpq %rcx, %rdx
; WIN_X64: jae .LBB0_3
; WIN_X64:# %bb.1:
; WIN_X64: andq $-4096, %rdx
; WIN_X64:.LBB0_2:
; WIN_X64: addq $-4096, %rcx
; WIN_X64: movb $0, (%rcx)
; WIN_X64: cmpq %rcx, %rdx
; WIN_X64: jne .LBB0_2
; WIN_X64:.LBB0_3:
; WIN_X64: subq %rax, %rsp
; WIN_X64: xorl %eax, %eax
; WIN_X64: addq $4096, %rsp
; WIN_X64: retq
; LINUX-LABEL:main4k:
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 0
}
; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with frame pointer
define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
entry:
; WIN_X64-LABEL:main4k_frame:
; WIN_X64: movq %gs:16, %rcx
; LINUX-LABEL:main4k_frame:
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 0
}
; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with INT args
define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
entry:
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
%t = add i32 %x, %y
ret i32 %t
}
; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with FP regs
define i32 @main4k_fpargs(double %x, double %y) nounwind {
entry:
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 0
}
; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with mixed regs
define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
entry:
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 %y
}
; Make sure we don't emit the probe for a smaller prolog stack allocation.
define i32 @main128() nounwind {
entry:
; WIN_X64-NOT: movq %gs:16, %rcx
; WIN_X64: retq
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [128 x i8]
ret i32 0
}
; Make sure we don't emit the probe sequence if not on windows even if the
; caller has the Win64 calling convention.
define win64cc i32 @main4k_win64() nounwind {
entry:
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 0
}
declare i32 @bar(i8*) nounwind
; Within-body inline probe expansion
define win64cc i32 @main4k_alloca(i64 %n) nounwind {
entry:
; WIN_X64: callq bar
; WIN_X64: movq %gs:16, [[R:%r.*]]
; WIN_X64: callq bar
; LINUX: callq bar
; LINUX-NOT: movq %gs:16, [[R:%r.*]]
; LINUX: callq bar
%a = alloca i8, i64 1024
%ra = call i32 @bar(i8* %a) nounwind
%b = alloca i8, i64 %n
%rb = call i32 @bar(i8* %b) nounwind
%r = add i32 %ra, %rb
ret i32 %r
}
; Influence of stack-probe-size attribute
; Note this is not exposed in coreclr
define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
; WIN_X64-NOT: movq %gs:16, %rcx
; WIN_X64: retq
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
%a = alloca [4096 x i8]
ret i32 0
}
|