1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
| ; RUN: llc < %s -march=r600 -mcpu=redwood -r600-ir-structurize=0 | FileCheck %s
; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
;
; entry
; / \
; diamond_head branch_from
; / \ |
; diamond_false diamond_true
; \ /
; done
;
; When the diamond_true branch had more than 100 instructions.
;
;
; CHECK-LABEL: {{^}}branch_into_diamond:
; === entry block:
; CHECK: ALU_PUSH_BEFORE
; === Branch instruction (IF):
; CHECK: JUMP
; === branch_from block
; CHECK: ALU
; === Duplicated diamond_true block (There can be more than one ALU clause):
; === XXX: We should be able to optimize this so the basic block is not
; === duplicated. See comments in
; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
; CHECK: ALU
; === Branch instruction (ELSE):
; CHECK: ELSE
; === diamond_head block:
; CHECK: ALU_PUSH_BEFORE
; === Branch instruction (IF):
; CHECK: JUMP
; === diamond_true block (There can be more than one ALU clause):
; ALU
; === Branch instruction (ELSE):
; CHECK: ELSE
; === diamond_false block plus implicit ENDIF
; CHECK: ALU_POP_AFTER
; === Branch instruction (ENDIF):
; CHECK: POP
; === done block:
; CHECK: ALU
; CHECK: MEM_RAT_CACHELESS
; CHECK: CF_END
define amdgpu_kernel void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = icmp ne i32 %a, 0
br i1 %0, label %diamond_head, label %branch_from
diamond_head:
%1 = icmp ne i32 %a, 1
br i1 %1, label %diamond_true, label %diamond_false
branch_from:
%2 = add i32 %a, 1
br label %diamond_true
diamond_false:
%3 = add i32 %a, 2
br label %done
diamond_true:
%4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
; This block needs to be > 100 ISA instructions to hit the bug,
; so we'll use udiv instructions.
%div0 = udiv i32 %a, %b
%div1 = udiv i32 %div0, %4
%div2 = udiv i32 %div1, 11
%div3 = udiv i32 %div2, %a
%div4 = udiv i32 %div3, %b
%div5 = udiv i32 %div4, %c
%div6 = udiv i32 %div5, %div0
%div7 = udiv i32 %div6, %div1
br label %done
done:
%5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
store i32 %5, i32 addrspace(1)* %out
ret void
}
|