1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test that a replicate of a load gets folded to vlrep also in cases where
; the load has multiple users.
; CHECK-NOT: vrep
define double @fun(double* %Vsrc, <2 x double> %T) {
entry:
%Vgep1 = getelementptr double, double* %Vsrc, i64 0
%Vld1 = load double, double* %Vgep1
%Vgep2 = getelementptr double, double* %Vsrc, i64 1
%Vld2 = load double, double* %Vgep2
%Vgep3 = getelementptr double, double* %Vsrc, i64 2
%Vld3 = load double, double* %Vgep3
%Vgep4 = getelementptr double, double* %Vsrc, i64 3
%Vld4 = load double, double* %Vgep4
%Vgep5 = getelementptr double, double* %Vsrc, i64 4
%Vld5 = load double, double* %Vgep5
%Vgep6 = getelementptr double, double* %Vsrc, i64 5
%Vld6 = load double, double* %Vgep6
%V19 = insertelement <2 x double> undef, double %Vld1, i32 0
%V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer
%V21 = insertelement <2 x double> undef, double %Vld4, i32 0
%V22 = insertelement <2 x double> %V21, double %Vld5, i32 1
%V23 = fmul <2 x double> %V20, %V22
%V24 = fadd <2 x double> %T, %V23
%V25 = insertelement <2 x double> %V19, double %Vld2, i32 1
%V26 = insertelement <2 x double> undef, double %Vld6, i32 0
%V27 = insertelement <2 x double> %V26, double %Vld6, i32 1
%V28 = fmul <2 x double> %V25, %V27
%V29 = fadd <2 x double> %T, %V28
%V30 = insertelement <2 x double> undef, double %Vld2, i32 0
%V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer
%V32 = insertelement <2 x double> undef, double %Vld5, i32 0
%V33 = insertelement <2 x double> %V32, double %Vld6, i32 1
%V34 = fmul <2 x double> %V31, %V33
%V35 = fadd <2 x double> %T, %V34
%V36 = insertelement <2 x double> undef, double %Vld3, i32 0
%V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer
%V38 = fmul <2 x double> %V37, %V22
%V39 = fadd <2 x double> %T, %V38
%Vmul37 = fmul double %Vld3, %Vld6
%Vadd38 = fadd double %Vmul37, %Vmul37
%VA0 = fadd <2 x double> %V24, %V29
%VA1 = fadd <2 x double> %VA0, %V35
%VA2 = fadd <2 x double> %VA1, %V39
%VE0 = extractelement <2 x double> %VA2, i32 0
%VS1 = fadd double %VE0, %Vadd38
ret double %VS1
}
|