1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
| ; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
%vec = insertelement <2 x float> %in, float %arg, i32 0
%res = fadd <2 x float> %vec, %vec
; CHECK: vext.32 d1, d1, d0, #1
; CHECK: vext.32 d1, d1, d1, #1
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
ret <2 x float> %res
}
define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
%vec = insertelement <2 x float> %in, float %arg, i32 1
%res = fadd <2 x float> %vec, %vec
; CHECK: vext.32 d1, d1, d1, #1
; CHECK: vext.32 d1, d1, d0, #1
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
ret <2 x float> %res
}
define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
%vec = insertelement <2 x float> %in, float %arg, i32 0
%res = fadd <2 x float> %vec, %vec
; CHECK: vext.32 d1, d1, d1, #1
; CHECK: vext.32 d1, d0, d1, #1
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
ret <2 x float> %res
}
define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
%vec = insertelement <2 x float> %in, float %arg, i32 1
%res = fadd <2 x float> %vec, %vec
; CHECK: vext.32 d1, d0, d1, #1
; CHECK: vext.32 d1, d1, d1, #1
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
ret <2 x float> %res
}
define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
; CHECK-LABEL: test_vmovs_via_vdup:
; Do an operation (which will end up NEON because of +neonfp) to convince the
; execution-domain pass that NEON is a good thing to use.
%res = fadd float %ret, %ret
; It makes sense for LLVM to do the addition in d0 here, because it's going
; to be returned. This means it will want a "vmov s0, s1":
; CHECK: vdup.32 d0, d0[1]
ret float %res
}
declare float @llvm.sqrt.f32(float)
declare void @bar()
; This is a comp
define float @test_ineligible(float, float %in) {
; CHECK-LABEL: test_ineligible:
%sqrt = call float @llvm.sqrt.f32(float %in)
%val = fadd float %sqrt, %sqrt
; This call forces a move from a callee-saved register to the return-reg. That
; move is not eligible for conversion to a d-register instructions because the
; use-def chains would be messed up. Primarily a compile-test (we used to
; internal fault).
call void @bar()
; CHECK: bl bar
; CHECK: vext.32
; CHECK: vext.32
ret float %val
}
define i32 @test_vmovs_no_sreg(i32 %in) {
; CHECK-LABEL: test_vmovs_no_sreg:
; Check that the movement to and from GPRs takes place in the NEON domain.
; CHECK: vmov.32 d
%x = bitcast i32 %in to float
%res = fadd float %x, %x
; CHECK: vmov.32 r{{[0-9]+}}, d
%resi = bitcast float %res to i32
ret i32 %resi
}
; The point of this test is:
; + Make sure s1 is live before the BL
; + Make sure s1 is clobbered by the BL
; + Convince LLVM to emit a VMOV to S0
; + Convince LLVM to domain-convert this.
; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
; because it's dead.
declare float @clobbers_s1(float, float)
define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
%elt = call float @clobbers_s1(float %val, float %val)
%vec = insertelement <2 x float> %invec, float %elt, i32 0
%res = fadd <2 x float> %vec, %vec
ret <2 x float> %res
}
|