1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
| # RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble -output-asm-variant=1 < %s | FileCheck %s
# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon,v8.2a,fullfp16 --disassemble -output-asm-variant=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
#-----------------------------------------------------------------------------
# Floating-point arithmetic
#-----------------------------------------------------------------------------
0x41 0xc0 0xe0 0x1e
0x41 0xc0 0x20 0x1e
0x41 0xc0 0x60 0x1e
# FP16: fabs h1, h2
# CHECK: fabs s1, s2
# CHECK: fabs d1, d2
0x41 0x28 0xe3 0x1e
0x41 0x28 0x23 0x1e
0x41 0x28 0x63 0x1e
# FP16: fadd h1, h2, h3
# CHECK: fadd s1, s2, s3
# CHECK: fadd d1, d2, d3
0x41 0x18 0xe3 0x1e
0x41 0x18 0x23 0x1e
0x41 0x18 0x63 0x1e
# FP16: fdiv h1, h2, h3
# CHECK: fdiv s1, s2, s3
# CHECK: fdiv d1, d2, d3
0x41 0x10 0xc3 0x1f
0x41 0x10 0x03 0x1f
0x41 0x10 0x43 0x1f
# FP16: fmadd h1, h2, h3, h4
# CHECK: fmadd s1, s2, s3, s4
# CHECK: fmadd d1, d2, d3, d4
0x41 0x48 0xe3 0x1e
0x41 0x48 0x23 0x1e
0x41 0x48 0x63 0x1e
0x41 0x68 0xe3 0x1e
0x41 0x68 0x23 0x1e
0x41 0x68 0x63 0x1e
# FP16: fmax h1, h2, h3
# CHECK: fmax s1, s2, s3
# CHECK: fmax d1, d2, d3
# FP16: fmaxnm h1, h2, h3
# CHECK: fmaxnm s1, s2, s3
# CHECK: fmaxnm d1, d2, d3
0x41 0x58 0xe3 0x1e
0x41 0x58 0x23 0x1e
0x41 0x58 0x63 0x1e
0x41 0x78 0xe3 0x1e
0x41 0x78 0x23 0x1e
0x41 0x78 0x63 0x1e
# FP16: fmin h1, h2, h3
# CHECK: fmin s1, s2, s3
# CHECK: fmin d1, d2, d3
# FP16: fminnm h1, h2, h3
# CHECK: fminnm s1, s2, s3
# CHECK: fminnm d1, d2, d3
0x41 0x90 0xc3 0x1f
0x41 0x90 0x03 0x1f
0x41 0x90 0x43 0x1f
# FP16: fmsub h1, h2, h3, h4
# CHECK: fmsub s1, s2, s3, s4
# CHECK: fmsub d1, d2, d3, d4
0x41 0x08 0xe3 0x1e
0x41 0x08 0x23 0x1e
0x41 0x08 0x63 0x1e
# FP16: fmul h1, h2, h3
# CHECK: fmul s1, s2, s3
# CHECK: fmul d1, d2, d3
0x41 0x40 0xe1 0x1e
0x41 0x40 0x21 0x1e
0x41 0x40 0x61 0x1e
# FP16: fneg h1, h2
# CHECK: fneg s1, s2
# CHECK: fneg d1, d2
0x41 0x10 0xe3 0x1f
0x41 0x10 0x23 0x1f
0x41 0x10 0x63 0x1f
# FP16: fnmadd h1, h2, h3, h4
# CHECK: fnmadd s1, s2, s3, s4
# CHECK: fnmadd d1, d2, d3, d4
0x41 0x90 0xe3 0x1f
0x41 0x90 0x23 0x1f
0x41 0x90 0x63 0x1f
# FP16: fnmsub h1, h2, h3, h4
# CHECK: fnmsub s1, s2, s3, s4
# CHECK: fnmsub d1, d2, d3, d4
0x41 0x88 0xe3 0x1e
0x41 0x88 0x23 0x1e
0x41 0x88 0x63 0x1e
# FP16: fnmul h1, h2, h3
# CHECK: fnmul s1, s2, s3
# CHECK: fnmul d1, d2, d3
0x41 0xc0 0xe1 0x1e
0x41 0xc0 0x21 0x1e
0x41 0xc0 0x61 0x1e
# FP16: fsqrt h1, h2
# CHECK: fsqrt s1, s2
# CHECK: fsqrt d1, d2
0x41 0x38 0xe3 0x1e
0x41 0x38 0x23 0x1e
0x41 0x38 0x63 0x1e
# FP16: fsub h1, h2, h3
# CHECK: fsub s1, s2, s3
# CHECK: fsub d1, d2, d3
#-----------------------------------------------------------------------------
# Floating-point comparison
#-----------------------------------------------------------------------------
0x20 0x04 0xe2 0x1e
0x20 0x04 0x22 0x1e
0x20 0x04 0x62 0x1e
0x30 0x04 0xe2 0x1e
0x30 0x04 0x22 0x1e
0x30 0x04 0x62 0x1e
# FP16: fccmp h1, h2, #0, eq
# CHECK: fccmp s1, s2, #0, eq
# CHECK: fccmp d1, d2, #0, eq
# FP16: fccmpe h1, h2, #0, eq
# CHECK: fccmpe s1, s2, #0, eq
# CHECK: fccmpe d1, d2, #0, eq
0x20 0x20 0xe2 0x1e
0x20 0x20 0x22 0x1e
0x20 0x20 0x62 0x1e
0x28 0x20 0xe0 0x1e
0x28 0x20 0x20 0x1e
0x28 0x20 0x60 0x1e
0x30 0x20 0xe2 0x1e
0x30 0x20 0x22 0x1e
0x30 0x20 0x62 0x1e
0x38 0x20 0xe0 0x1e
0x38 0x20 0x20 0x1e
0x38 0x20 0x60 0x1e
# FP16: fcmp h1, h2
# CHECK: fcmp s1, s2
# CHECK: fcmp d1, d2
# FP16: fcmp h1, #0.0
# CHECK: fcmp s1, #0.0
# CHECK: fcmp d1, #0.0
# FP16: fcmpe h1, h2
# CHECK: fcmpe s1, s2
# CHECK: fcmpe d1, d2
# FP16: fcmpe h1, #0.0
# CHECK: fcmpe s1, #0.0
# CHECK: fcmpe d1, #0.0
#-----------------------------------------------------------------------------
# Floating-point conditional select
#-----------------------------------------------------------------------------
0x41 0x0c 0xe3 0x1e
0x41 0x0c 0x23 0x1e
0x41 0x0c 0x63 0x1e
# FP16: fcsel h1, h2, h3, eq
# CHECK: fcsel s1, s2, s3, eq
# CHECK: fcsel d1, d2, d3, eq
#-----------------------------------------------------------------------------
# Floating-point convert
#-----------------------------------------------------------------------------
0x41 0xc0 0x63 0x1e
0x41 0x40 0x62 0x1e
0x41 0xc0 0xe2 0x1e
0x41 0x40 0xe2 0x1e
0x41 0xc0 0x22 0x1e
0x41 0xc0 0x23 0x1e
# CHECK: fcvt h1, d2
# CHECK: fcvt s1, d2
# CHECK: fcvt d1, h2
# CHECK: fcvt s1, h2
# CHECK: fcvt d1, s2
# CHECK: fcvt h1, s2
0x41 0x00 0x44 0x1e
0x41 0x04 0x44 0x1e
0x41 0x00 0x44 0x9e
0x41 0x04 0x44 0x9e
0x41 0x00 0x04 0x1e
0x41 0x04 0x04 0x1e
0x41 0x00 0x04 0x9e
0x41 0x04 0x04 0x9e
#-----------------------------------------------------------------------------
# Floating-point move
#-----------------------------------------------------------------------------
0x41 0x00 0xe7 0x1e
0x41 0x00 0xe6 0x1e
0x41 0x00 0x27 0x1e
0x41 0x00 0x26 0x1e
0x41 0x00 0x67 0x9e
0x41 0x00 0x66 0x9e
# FP16: fmov h1, w2
# FP16: fmov w1, h2
# CHECK: fmov s1, w2
# CHECK: fmov w1, s2
# CHECK: fmov d1, x2
# CHECK: fmov x1, d2
0x01 0x10 0xe8 0x1e
0x01 0x10 0x28 0x1e
0x01 0x10 0x68 0x1e
0x01 0xf0 0x7b 0x1e
0x01 0xf0 0x6b 0x1e
# FP16: fmov h1, #0.12500000
# CHECK: fmov s1, #0.12500000
# CHECK: fmov d1, #0.12500000
# CHECK: fmov d1, #-0.48437500
# CHECK: fmov d1, #0.48437500
0x41 0x40 0xe0 0x1e
0x41 0x40 0x20 0x1e
0x41 0x40 0x60 0x1e
# FP16: fmov h1, h2
# CHECK: fmov s1, s2
# CHECK: fmov d1, d2
#-----------------------------------------------------------------------------
# Floating-point round to integral
#-----------------------------------------------------------------------------
0x41 0x40 0xe6 0x1e
0x41 0x40 0x26 0x1e
0x41 0x40 0x66 0x1e
# FP16: frinta h1, h2
# CHECK: frinta s1, s2
# CHECK: frinta d1, d2
0x41 0xc0 0xe7 0x1e
0x41 0xc0 0x27 0x1e
0x41 0xc0 0x67 0x1e
# FP16: frinti h1, h2
# CHECK: frinti s1, s2
# CHECK: frinti d1, d2
0x41 0x40 0xe5 0x1e
0x41 0x40 0x25 0x1e
0x41 0x40 0x65 0x1e
# FP16: frintm h1, h2
# CHECK: frintm s1, s2
# CHECK: frintm d1, d2
0x41 0x40 0xe4 0x1e
0x41 0x40 0x24 0x1e
0x41 0x40 0x64 0x1e
# FP16: frintn h1, h2
# CHECK: frintn s1, s2
# CHECK: frintn d1, d2
0x41 0xc0 0xe4 0x1e
0x41 0xc0 0x24 0x1e
0x41 0xc0 0x64 0x1e
# FP16: frintp h1, h2
# CHECK: frintp s1, s2
# CHECK: frintp d1, d2
0x41 0x40 0xe7 0x1e
0x41 0x40 0x27 0x1e
0x41 0x40 0x67 0x1e
# FP16: frintx h1, h2
# CHECK: frintx s1, s2
# CHECK: frintx d1, d2
0x41 0xc0 0xe5 0x1e
0x41 0xc0 0x25 0x1e
0x41 0xc0 0x65 0x1e
# FP16: frintz h1, h2
# CHECK: frintz s1, s2
# CHECK: frintz d1, d2
0x00 0x3c 0xe0 0x7e
0x00 0x8c 0xe0 0x5e
# CHECK: cmhs d0, d0, d0
# CHECK: cmtst d0, d0, d0
0x00 0x00 0xaf 0x9e
0x00 0x00 0xae 0x9e
# CHECK: fmov.d v0[1], x0
# CHECK: fmov.d x0, v0[1]
|