1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
| ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: %bb22
; Load from %arg has alias store in Loop
; CHECK: flat_load_dword
; #####################################################################
; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
; CHECK: s_load_dword
; #####################################################################
; CHECK-LABEL: %bb11
; Load from %arg in a Loop body has alias store
; CHECK: flat_load_dword
; CHECK-LABEL: %bb20
; CHECK: flat_store_dword
define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
bb:
%tmp = sext i32 %arg2 to i64
%tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp
%tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0
%tmp5 = icmp sgt i32 %tmp4, 0
br i1 %tmp5, label %bb6, label %bb8
bb6: ; preds = %bb
br label %bb11
bb7: ; preds = %bb22
br label %bb8
bb8: ; preds = %bb7, %bb
%tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
%tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp
store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0
ret void
bb11: ; preds = %bb22, %bb6
%tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
%tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
%tmp14 = srem i32 %tmp13, %arg2
%tmp15 = sext i32 %tmp14 to i64
%tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
%tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
%tmp18 = icmp sgt i32 %tmp17, 100
%tmp19 = sext i32 %tmp13 to i64
br i1 %tmp18, label %bb20, label %bb22
bb20: ; preds = %bb11
%tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
br label %bb22
bb22: ; preds = %bb20, %bb11
%tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19
%tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0
%tmp25 = add nuw nsw i32 %tmp13, 1
%tmp26 = sext i32 %tmp25 to i64
%tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26
%tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0
%tmp29 = add i32 %tmp24, %tmp12
%tmp30 = add i32 %tmp29, %tmp28
%tmp31 = icmp eq i32 %tmp25, %tmp4
br i1 %tmp31, label %bb7, label %bb11
}
; one more test to ensure that aliasing store after the load
; is considered clobbering if load parent block is the same
; as a loop header block.
; CHECK-LABEL: %bb1
; Load from %arg has alias store that is after the load
; but is considered clobbering because of the loop.
; CHECK: flat_load_dword
define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
bb:
br label %bb1
bb2:
ret void
bb1:
%tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ]
%tmp14 = srem i32 %tmp13, %arg2
%tmp15 = sext i32 %tmp14 to i64
%tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
%tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
%tmp19 = sext i32 %tmp13 to i64
%tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
%tmp25 = add nuw nsw i32 %tmp13, 1
%tmp31 = icmp eq i32 %tmp25, 100
br i1 %tmp31, label %bb2, label %bb1
}
attributes #0 = { "target-cpu"="fiji" }
!0 = !{!1, !1, i64 0}
!1 = !{!"int", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}
|