1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
| ; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; i and i + 1 can overflow in the following kernel:
; void test1(unsigned long long x, int *a, int *b) {
; for (unsigned i = 0; i < x; ++i)
; b[i] = a[i+1] + 1;
; }
;
; If accesses to a and b can alias, we need to emit a run-time alias check
; between accesses to a and b. However, when i and i + 1 can wrap, their
; SCEV expression is not an AddRec. We need to create SCEV predicates and
; coerce the expressions to AddRecs in order to be able to emit the run-time
; alias check.
;
; The accesses at b[i] and a[i+1] correspond to the addresses %arrayidx and
; %arrayidx4 in the test. The SCEV expressions for these are:
; ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
; ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
;
; The transformed expressions are:
; i64 {(4 + %a),+,4}<%for.body>
; i64 {(4 + %b),+,4}<%for.body>
; CHECK-LABEL: test1
; CHECK: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group
; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body>
; CHECK-NEXT: Group
; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
; CHECK-NEXT: Member: {%b,+,4}<%for.body>
; CHECK: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom:
; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body>
; CHECK-NEXT: [PSE] %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11:
; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
; CHECK-NEXT: --> {%b,+,4}<%for.body>
define void @test1(i64 %x, i32* %a, i32* %b) {
entry:
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%add = add i32 %i.010, 1
%idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
%ld = load i32, i32* %arrayidx, align 4
%add2 = add nsw i32 %ld, 1
%arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
store i32 %add2, i32* %arrayidx4, align 4
%conv = zext i32 %add to i64
%cmp = icmp ult i64 %conv, %x
br i1 %cmp, label %for.body, label %exit
exit:
ret void
}
; i can overflow in the following kernel:
; void test2(unsigned long long x, int *a) {
; for (unsigned i = 0; i < x; ++i)
; a[i] = a[i] + 1;
; }
;
; We need to check that i doesn't wrap, but we don't need a run-time alias
; check. We also need an extra no-wrap check to get the backedge taken count.
; CHECK-LABEL: test2
; CHECK: Memory dependences are safe
; CHECK: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
define void @test2(i64 %x, i32* %a) {
entry:
br label %for.body
for.body:
%conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %conv11
%ld = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %ld, 1
store i32 %add, i32* %arrayidx, align 4
%inc = add i32 %i.010, 1
%conv = zext i32 %inc to i64
%cmp = icmp ult i64 %conv, %x
br i1 %cmp, label %for.body, label %exit
exit:
ret void
}
|