1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
| ; Test vector insertions of byte-swapped memory values into 0.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test VLLEBRZH.
define <8 x i16> @f1(i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vllebrzh %v24, 0(%r2)
; CHECK: br %r14
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test VLLEBRZH using a vector bswap.
define <8 x i16> @f2(i16 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vllebrzh %v24, 0(%r2)
; CHECK: br %r14
%val = load i16, i16 *%ptr
%insert = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
ret <8 x i16> %ret
}
; Test VLLEBRZF.
define <4 x i32> @f3(i32 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vllebrzf %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %val)
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 1
ret <4 x i32> %ret
}
; Test VLLEBRZF using a vector bswap.
define <4 x i32> @f4(i32 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vllebrzf %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test VLLEBRZG.
define <2 x i64> @f5(i64 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vllebrzg %v24, 0(%r2)
; CHECK: br %r14
%val = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %val)
%ret = insertelement <2 x i64> zeroinitializer, i64 %swap, i32 0
ret <2 x i64> %ret
}
; Test VLLEBRZG using a vector bswap.
define <2 x i64> @f6(i64 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vllebrzg %v24, 0(%r2)
; CHECK: br %r14
%val = load i64, i64 *%ptr
%insert = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
ret <2 x i64> %ret
}
; Test VLLEBRZE.
define <4 x i32> @f7(i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vllebrze %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %val)
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 0
ret <4 x i32> %ret
}
; Test VLLEBRZE using a vector bswap.
define <4 x i32> @f8(i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vllebrze %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test VLLEBRZH with the highest in-range offset.
define <8 x i16> @f9(i16 *%base) {
; CHECK-LABEL: f9:
; CHECK: vllebrzh %v24, 4094(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2047
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test VLLEBRZH with the next highest offset.
define <8 x i16> @f10(i16 *%base) {
; CHECK-LABEL: f10:
; CHECK-NOT: vllebrzh %v24, 4096(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2048
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test that VLLEBRZH allows an index.
define <8 x i16> @f11(i16 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
; CHECK: vllebrzh %v24, 0([[REG]],%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 %index
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
|