1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
| ; Test memcmp using CLC, with i32 results.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
; Zero-length comparisons should be optimized away.
define i32 @f1(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f1:
; CHECK: lhi %r2, 0
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
ret i32 %res
}
; Check a case where the result is used as an integer.
define i32 @f2(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f2:
; CHECK: clc 0(2,%r3), 0(%r2)
; CHECK: ipm %r2
; CHECK: sll %r2, 2
; CHECK: sra %r2, 30
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
ret i32 %res
}
; Check a case where the result is tested for equality.
define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f3:
; CHECK: clc 0(3,%r3), 0(%r2)
; CHECK-NEXT: ber %r14
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Check a case where the result is tested for inequality.
define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f4:
; CHECK: clc 0(4,%r3), 0(%r2)
; CHECK-NEXT: blhr %r14
; CHECK: br %r14
entry:
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Check a case where the result is tested via slt.
define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f5:
; CHECK: clc 0(5,%r3), 0(%r2)
; CHECK-NEXT: bhr %r14
; CHECK: br %r14
entry:
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Check a case where the result is tested for sgt.
define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f6:
; CHECK: clc 0(6,%r3), 0(%r2)
; CHECK-NEXT: blr %r14
; CHECK: br %r14
entry:
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
%cmp = icmp sgt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Check the upper end of the CLC range. Here the result is used both as
; an integer and for branching.
define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f7:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: ipm %r2
; CHECK: sll %r2, 2
; CHECK: sra %r2, 30
; CHECK: blr %r14
; CHECK: br %r14
entry:
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; 257 bytes needs two CLCs.
define i32 @f8(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f8:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK: clc 256(1,%r3), 256(%r2)
; CHECK: [[LABEL]]:
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
ret i32 %res
}
; Test a comparison of 258 bytes in which the CC result can be used directly.
define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f9:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK: clc 256(1,%r3), 256(%r2)
; CHECK: [[LABEL]]:
; CHECK-NEXT: bhr %r14
; CHECK: br %r14
entry:
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Test the largest size that can use two CLCs.
define i32 @f10(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f10:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK: clc 256(256,%r3), 256(%r2)
; CHECK: [[LABEL]]:
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
ret i32 %res
}
; Test the smallest size that needs 3 CLCs.
define i32 @f11(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f11:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK: clc 256(256,%r3), 256(%r2)
; CHECK: jlh [[LABEL]]
; CHECK: clc 512(1,%r3), 512(%r2)
; CHECK: [[LABEL]]:
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
ret i32 %res
}
; Test the largest size than can use 3 CLCs.
define i32 @f12(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f12:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK: clc 256(256,%r3), 256(%r2)
; CHECK: jlh [[LABEL]]
; CHECK: clc 512(256,%r3), 512(%r2)
; CHECK: [[LABEL]]:
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
ret i32 %res
}
; The next size up uses a loop instead. We leave the more complicated
; loop tests to memcpy-01.ll, which shares the same form.
define i32 @f13(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f13:
; CHECK: lghi [[COUNT:%r[0-5]]], 3
; CHECK: [[LOOP:.L[^:]*]]:
; CHECK: clc 0(256,%r3), 0(%r2)
; CHECK: jlh [[LABEL:\..*]]
; CHECK-DAG: la %r2, 256(%r2)
; CHECK-DAG: la %r3, 256(%r3)
; CHECK: brctg [[COUNT]], [[LOOP]]
; CHECK: clc 0(1,%r3), 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
ret i32 %res
}
|