1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
| ; Test various target-specific DAG combiner patterns.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Check that an extraction followed by a truncation is effectively treated
; as a bitcast.
define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
; CHECK-LABEL: f1:
; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%elem1 = extractelement <4 x i32> %add, i32 0
%elem2 = extractelement <4 x i32> %add, i32 3
%trunc1 = trunc i32 %elem1 to i8
%trunc2 = trunc i32 %elem2 to i8
store i8 %trunc1, i8 *%ptr1
store i8 %trunc2, i8 *%ptr2
ret void
}
; Test a case where a pack-type shuffle can be eliminated.
define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: f2:
; CHECK-NOT: vpk
; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
; CHECK: br %r14
%add1 = add <4 x i32> %v1, %v2
%add2 = add <4 x i32> %v2, %v3
%shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 1
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again in a case where there's also a splat and a bitcast.
define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f3:
; CHECK-NOT: vrepg
; CHECK-NOT: vpk
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 2
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again with a merge low instead of a pack.
define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f4:
; CHECK-NOT: vrepg
; CHECK-NOT: vmr
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 2, i32 6, i32 3, i32 7>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 4
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; ...and again with a merge high.
define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
; CHECK-LABEL: f5:
; CHECK-NOT: vrepg
; CHECK-NOT: vmr
; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
; CHECK: br %r14
%add = add <4 x i32> %v1, %v2
%splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
<2 x i32> <i32 0, i32 0>
%splatcast = bitcast <2 x i64> %splat to <4 x i32>
%shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
<4 x i32> <i32 0, i32 4, i32 1, i32 5>
%bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 4
%elem2 = extractelement <8 x i16> %bitcast, i32 7
%res = add i16 %elem1, %elem2
ret i16 %res
}
; Test a case where an unpack high can be eliminated from the usual
; load-extend sequence.
define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
; CHECK-LABEL: f6:
; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2)
; CHECK-NOT: vup
; CHECK-DAG: vsteb [[REG]], 0(%r3), 1
; CHECK-DAG: vsteb [[REG]], 0(%r4), 2
; CHECK-DAG: vsteb [[REG]], 0(%r5), 7
; CHECK: br %r14
%vec = load <8 x i8>, <8 x i8> *%ptr1
%ext = sext <8 x i8> %vec to <8 x i16>
%elem1 = extractelement <8 x i16> %ext, i32 1
%elem2 = extractelement <8 x i16> %ext, i32 2
%elem3 = extractelement <8 x i16> %ext, i32 7
%trunc1 = trunc i16 %elem1 to i8
%trunc2 = trunc i16 %elem2 to i8
%trunc3 = trunc i16 %elem3 to i8
store i8 %trunc1, i8 *%ptr2
store i8 %trunc2, i8 *%ptr3
store i8 %trunc3, i8 *%ptr4
ret void
}
; ...and again with a bitcast inbetween.
define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
; CHECK-LABEL: f7:
; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2)
; CHECK-NOT: vup
; CHECK-DAG: vsteb [[REG]], 0(%r3), 0
; CHECK-DAG: vsteb [[REG]], 0(%r4), 1
; CHECK-DAG: vsteb [[REG]], 0(%r5), 3
; CHECK: br %r14
%vec = load <4 x i8>, <4 x i8> *%ptr1
%ext = sext <4 x i8> %vec to <4 x i32>
%bitcast = bitcast <4 x i32> %ext to <8 x i16>
%elem1 = extractelement <8 x i16> %bitcast, i32 1
%elem2 = extractelement <8 x i16> %bitcast, i32 3
%elem3 = extractelement <8 x i16> %bitcast, i32 7
%trunc1 = trunc i16 %elem1 to i8
%trunc2 = trunc i16 %elem2 to i8
%trunc3 = trunc i16 %elem3 to i8
store i8 %trunc1, i8 *%ptr2
store i8 %trunc2, i8 *%ptr3
store i8 %trunc3, i8 *%ptr4
ret void
}
|