Revision | 14e4c1e2355473ccb2939afc69ac8f25de103b92 (tree) |
---|---|
Time | 2018-02-09 00:54:08 |
Author | Richard Henderson <richard.henderson@lina...> |
Commiter | Richard Henderson |
tcg/aarch64: Add vector operations
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
@@ -31,13 +31,22 @@ typedef enum { | ||
31 | 31 | TCG_REG_SP = 31, |
32 | 32 | TCG_REG_XZR = 31, |
33 | 33 | |
34 | + TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | |
35 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | |
36 | + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, | |
37 | + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, | |
38 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | |
39 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | |
40 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | |
41 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | |
42 | + | |
34 | 43 | /* Aliases. */ |
35 | 44 | TCG_REG_FP = TCG_REG_X29, |
36 | 45 | TCG_REG_LR = TCG_REG_X30, |
37 | 46 | TCG_AREG0 = TCG_REG_X19, |
38 | 47 | } TCGReg; |
39 | 48 | |
40 | -#define TCG_TARGET_NB_REGS 32 | |
49 | +#define TCG_TARGET_NB_REGS 64 | |
41 | 50 | |
42 | 51 | /* used for function call generation */ |
43 | 52 | #define TCG_REG_CALL_STACK TCG_REG_SP |
@@ -113,6 +122,20 @@ typedef enum { | ||
113 | 122 | #define TCG_TARGET_HAS_mulsh_i64 1 |
114 | 123 | #define TCG_TARGET_HAS_direct_jump 1 |
115 | 124 | |
125 | +#define TCG_TARGET_HAS_v64 1 | |
126 | +#define TCG_TARGET_HAS_v128 1 | |
127 | +#define TCG_TARGET_HAS_v256 0 | |
128 | + | |
129 | +#define TCG_TARGET_HAS_andc_vec 1 | |
130 | +#define TCG_TARGET_HAS_orc_vec 1 | |
131 | +#define TCG_TARGET_HAS_not_vec 1 | |
132 | +#define TCG_TARGET_HAS_neg_vec 1 | |
133 | +#define TCG_TARGET_HAS_shi_vec 1 | |
134 | +#define TCG_TARGET_HAS_shs_vec 0 | |
135 | +#define TCG_TARGET_HAS_shv_vec 0 | |
136 | +#define TCG_TARGET_HAS_cmp_vec 1 | |
137 | +#define TCG_TARGET_HAS_mul_vec 1 | |
138 | + | |
116 | 139 | #define TCG_TARGET_DEFAULT_MO (0) |
117 | 140 | |
118 | 141 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) |
@@ -20,10 +20,15 @@ QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); | ||
20 | 20 | |
21 | 21 | #ifdef CONFIG_DEBUG_TCG |
22 | 22 | static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { |
23 | - "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", | |
24 | - "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", | |
25 | - "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", | |
26 | - "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", | |
23 | + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", | |
24 | + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", | |
25 | + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", | |
26 | + "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", | |
27 | + | |
28 | + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | |
29 | + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", | |
30 | + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | |
31 | + "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", | |
27 | 32 | }; |
28 | 33 | #endif /* CONFIG_DEBUG_TCG */ |
29 | 34 |
@@ -43,6 +48,14 @@ static const int tcg_target_reg_alloc_order[] = { | ||
43 | 48 | /* X19 reserved for AREG0 */ |
44 | 49 | /* X29 reserved as fp */ |
45 | 50 | /* X30 reserved as temporary */ |
51 | + | |
52 | + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | |
53 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | |
54 | + /* V8 - V15 are call-saved, and skipped. */ | |
55 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | |
56 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | |
57 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | |
58 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | |
46 | 59 | }; |
47 | 60 | |
48 | 61 | static const int tcg_target_call_iarg_regs[8] = { |
@@ -54,6 +67,7 @@ static const int tcg_target_call_oarg_regs[1] = { | ||
54 | 67 | }; |
55 | 68 | |
56 | 69 | #define TCG_REG_TMP TCG_REG_X30 |
70 | +#define TCG_VEC_TMP TCG_REG_V31 | |
57 | 71 | |
58 | 72 | #ifndef CONFIG_SOFTMMU |
59 | 73 | /* Note that XZR cannot be encoded in the address base register slot, |
@@ -119,9 +133,13 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
119 | 133 | const char *ct_str, TCGType type) |
120 | 134 | { |
121 | 135 | switch (*ct_str++) { |
122 | - case 'r': | |
136 | + case 'r': /* general registers */ | |
123 | 137 | ct->ct |= TCG_CT_REG; |
124 | - ct->u.regs = 0xffffffffu; | |
138 | + ct->u.regs |= 0xffffffffu; | |
139 | + break; | |
140 | + case 'w': /* advsimd registers */ | |
141 | + ct->ct |= TCG_CT_REG; | |
142 | + ct->u.regs |= 0xffffffff00000000ull; | |
125 | 143 | break; |
126 | 144 | case 'l': /* qemu_ld / qemu_st address, data_reg */ |
127 | 145 | ct->ct |= TCG_CT_REG; |
@@ -153,11 +171,13 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
153 | 171 | return ct_str; |
154 | 172 | } |
155 | 173 | |
174 | +/* Match a constant valid for addition (12-bit, optionally shifted). */ | |
156 | 175 | static inline bool is_aimm(uint64_t val) |
157 | 176 | { |
158 | 177 | return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; |
159 | 178 | } |
160 | 179 | |
180 | +/* Match a constant valid for logical operations. */ | |
161 | 181 | static inline bool is_limm(uint64_t val) |
162 | 182 | { |
163 | 183 | /* Taking a simplified view of the logical immediates for now, ignoring |
@@ -178,6 +198,106 @@ static inline bool is_limm(uint64_t val) | ||
178 | 198 | return (val & (val - 1)) == 0; |
179 | 199 | } |
180 | 200 | |
201 | +/* Match a constant that is valid for vectors. */ | |
202 | +static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8) | |
203 | +{ | |
204 | + int i; | |
205 | + | |
206 | + *op = 0; | |
207 | + /* Match replication across 8 bits. */ | |
208 | + if (v64 == dup_const(MO_8, v64)) { | |
209 | + *cmode = 0xe; | |
210 | + *imm8 = v64 & 0xff; | |
211 | + return true; | |
212 | + } | |
213 | + /* Match replication across 16 bits. */ | |
214 | + if (v64 == dup_const(MO_16, v64)) { | |
215 | + uint16_t v16 = v64; | |
216 | + | |
217 | + if (v16 == (v16 & 0xff)) { | |
218 | + *cmode = 0x8; | |
219 | + *imm8 = v16 & 0xff; | |
220 | + return true; | |
221 | + } else if (v16 == (v16 & 0xff00)) { | |
222 | + *cmode = 0xa; | |
223 | + *imm8 = v16 >> 8; | |
224 | + return true; | |
225 | + } | |
226 | + } | |
227 | + /* Match replication across 32 bits. */ | |
228 | + if (v64 == dup_const(MO_32, v64)) { | |
229 | + uint32_t v32 = v64; | |
230 | + | |
231 | + if (v32 == (v32 & 0xff)) { | |
232 | + *cmode = 0x0; | |
233 | + *imm8 = v32 & 0xff; | |
234 | + return true; | |
235 | + } else if (v32 == (v32 & 0xff00)) { | |
236 | + *cmode = 0x2; | |
237 | + *imm8 = (v32 >> 8) & 0xff; | |
238 | + return true; | |
239 | + } else if (v32 == (v32 & 0xff0000)) { | |
240 | + *cmode = 0x4; | |
241 | + *imm8 = (v32 >> 16) & 0xff; | |
242 | + return true; | |
243 | + } else if (v32 == (v32 & 0xff000000)) { | |
244 | + *cmode = 0x6; | |
245 | + *imm8 = v32 >> 24; | |
246 | + return true; | |
247 | + } else if ((v32 & 0xffff00ff) == 0xff) { | |
248 | + *cmode = 0xc; | |
249 | + *imm8 = (v32 >> 8) & 0xff; | |
250 | + return true; | |
251 | + } else if ((v32 & 0xff00ffff) == 0xffff) { | |
252 | + *cmode = 0xd; | |
253 | + *imm8 = (v32 >> 16) & 0xff; | |
254 | + return true; | |
255 | + } | |
256 | + /* Match forms of a float32. */ | |
257 | + if (extract32(v32, 0, 19) == 0 | |
258 | + && (extract32(v32, 25, 6) == 0x20 | |
259 | + || extract32(v32, 25, 6) == 0x1f)) { | |
260 | + *cmode = 0xf; | |
261 | + *imm8 = (extract32(v32, 31, 1) << 7) | |
262 | + | (extract32(v32, 25, 1) << 6) | |
263 | + | extract32(v32, 19, 6); | |
264 | + return true; | |
265 | + } | |
266 | + } | |
267 | + /* Match forms of a float64. */ | |
268 | + if (extract64(v64, 0, 48) == 0 | |
269 | + && (extract64(v64, 54, 9) == 0x100 | |
270 | + || extract64(v64, 54, 9) == 0x0ff)) { | |
271 | + *cmode = 0xf; | |
272 | + *op = 1; | |
273 | + *imm8 = (extract64(v64, 63, 1) << 7) | |
274 | + | (extract64(v64, 54, 1) << 6) | |
275 | + | extract64(v64, 48, 6); | |
276 | + return true; | |
277 | + } | |
278 | + /* Match bytes of 0x00 and 0xff. */ | |
279 | + for (i = 0; i < 64; i += 8) { | |
280 | + uint64_t byte = extract64(v64, i, 8); | |
281 | + if (byte != 0 && byte != 0xff) { | |
282 | + break; | |
283 | + } | |
284 | + } | |
285 | + if (i == 64) { | |
286 | + *cmode = 0xe; | |
287 | + *op = 1; | |
288 | + *imm8 = (extract64(v64, 0, 1) << 0) | |
289 | + | (extract64(v64, 8, 1) << 1) | |
290 | + | (extract64(v64, 16, 1) << 2) | |
291 | + | (extract64(v64, 24, 1) << 3) | |
292 | + | (extract64(v64, 32, 1) << 4) | |
293 | + | (extract64(v64, 40, 1) << 5) | |
294 | + | (extract64(v64, 48, 1) << 6) | |
295 | + | (extract64(v64, 56, 1) << 7); | |
296 | + return true; | |
297 | + } | |
298 | + return false; | |
299 | +} | |
300 | + | |
181 | 301 | static int tcg_target_const_match(tcg_target_long val, TCGType type, |
182 | 302 | const TCGArgConstraint *arg_ct) |
183 | 303 | { |
@@ -271,6 +391,9 @@ typedef enum { | ||
271 | 391 | |
272 | 392 | /* Load literal for loading the address at pc-relative offset */ |
273 | 393 | I3305_LDR = 0x58000000, |
394 | + I3305_LDR_v64 = 0x5c000000, | |
395 | + I3305_LDR_v128 = 0x9c000000, | |
396 | + | |
274 | 397 | /* Load/store register. Described here as 3.3.12, but the helper |
275 | 398 | that emits them can transform to 3.3.10 or 3.3.13. */ |
276 | 399 | I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, |
@@ -290,6 +413,15 @@ typedef enum { | ||
290 | 413 | I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, |
291 | 414 | I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, |
292 | 415 | |
416 | + I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, | |
417 | + I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, | |
418 | + | |
419 | + I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, | |
420 | + I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, | |
421 | + | |
422 | + I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, | |
423 | + I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, | |
424 | + | |
293 | 425 | I3312_TO_I3310 = 0x00200800, |
294 | 426 | I3312_TO_I3313 = 0x01000000, |
295 | 427 |
@@ -374,8 +506,48 @@ typedef enum { | ||
374 | 506 | I3510_EON = 0x4a200000, |
375 | 507 | I3510_ANDS = 0x6a000000, |
376 | 508 | |
377 | - NOP = 0xd503201f, | |
509 | + /* AdvSIMD copy */ | |
510 | + I3605_DUP = 0x0e000400, | |
511 | + I3605_INS = 0x4e001c00, | |
512 | + I3605_UMOV = 0x0e003c00, | |
513 | + | |
514 | + /* AdvSIMD modified immediate */ | |
515 | + I3606_MOVI = 0x0f000400, | |
516 | + | |
517 | + /* AdvSIMD shift by immediate */ | |
518 | + I3614_SSHR = 0x0f000400, | |
519 | + I3614_SSRA = 0x0f001400, | |
520 | + I3614_SHL = 0x0f005400, | |
521 | + I3614_USHR = 0x2f000400, | |
522 | + I3614_USRA = 0x2f001400, | |
523 | + | |
524 | + /* AdvSIMD three same. */ | |
525 | + I3616_ADD = 0x0e208400, | |
526 | + I3616_AND = 0x0e201c00, | |
527 | + I3616_BIC = 0x0e601c00, | |
528 | + I3616_EOR = 0x2e201c00, | |
529 | + I3616_MUL = 0x0e209c00, | |
530 | + I3616_ORR = 0x0ea01c00, | |
531 | + I3616_ORN = 0x0ee01c00, | |
532 | + I3616_SUB = 0x2e208400, | |
533 | + I3616_CMGT = 0x0e203400, | |
534 | + I3616_CMGE = 0x0e203c00, | |
535 | + I3616_CMTST = 0x0e208c00, | |
536 | + I3616_CMHI = 0x2e203400, | |
537 | + I3616_CMHS = 0x2e203c00, | |
538 | + I3616_CMEQ = 0x2e208c00, | |
539 | + | |
540 | + /* AdvSIMD two-reg misc. */ | |
541 | + I3617_CMGT0 = 0x0e208800, | |
542 | + I3617_CMEQ0 = 0x0e209800, | |
543 | + I3617_CMLT0 = 0x0e20a800, | |
544 | + I3617_CMGE0 = 0x2e208800, | |
545 | + I3617_CMLE0 = 0x2e20a800, | |
546 | + I3617_NOT = 0x2e205800, | |
547 | + I3617_NEG = 0x2e20b800, | |
548 | + | |
378 | 549 | /* System instructions. */ |
550 | + NOP = 0xd503201f, | |
379 | 551 | DMB_ISH = 0xd50338bf, |
380 | 552 | DMB_LD = 0x00000100, |
381 | 553 | DMB_ST = 0x00000200, |
@@ -520,26 +692,64 @@ static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, | ||
520 | 692 | tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); |
521 | 693 | } |
522 | 694 | |
695 | +static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, | |
696 | + TCGReg rd, TCGReg rn, int dst_idx, int src_idx) | |
697 | +{ | |
698 | + /* Note that bit 11 set means general register input. Therefore | |
699 | + we can handle both register sets with one function. */ | |
700 | + tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) | |
701 | + | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); | |
702 | +} | |
703 | + | |
704 | +static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, | |
705 | + TCGReg rd, bool op, int cmode, uint8_t imm8) | |
706 | +{ | |
707 | + tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) | |
708 | + | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); | |
709 | +} | |
710 | + | |
711 | +static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, | |
712 | + TCGReg rd, TCGReg rn, unsigned immhb) | |
713 | +{ | |
714 | + tcg_out32(s, insn | q << 30 | immhb << 16 | |
715 | + | (rn & 0x1f) << 5 | (rd & 0x1f)); | |
716 | +} | |
717 | + | |
718 | +static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, | |
719 | + unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) | |
720 | +{ | |
721 | + tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 | |
722 | + | (rn & 0x1f) << 5 | (rd & 0x1f)); | |
723 | +} | |
724 | + | |
725 | +static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, | |
726 | + unsigned size, TCGReg rd, TCGReg rn) | |
727 | +{ | |
728 | + tcg_out32(s, insn | q << 30 | (size << 22) | |
729 | + | (rn & 0x1f) << 5 | (rd & 0x1f)); | |
730 | +} | |
731 | + | |
523 | 732 | static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, |
524 | 733 | TCGReg rd, TCGReg base, TCGType ext, |
525 | 734 | TCGReg regoff) |
526 | 735 | { |
527 | 736 | /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ |
528 | 737 | tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | |
529 | - 0x4000 | ext << 13 | base << 5 | rd); | |
738 | + 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); | |
530 | 739 | } |
531 | 740 | |
532 | 741 | static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, |
533 | 742 | TCGReg rd, TCGReg rn, intptr_t offset) |
534 | 743 | { |
535 | - tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); | |
744 | + tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); | |
536 | 745 | } |
537 | 746 | |
538 | 747 | static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, |
539 | 748 | TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) |
540 | 749 | { |
541 | 750 | /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ |
542 | - tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); | |
751 | + tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | |
752 | + | rn << 5 | (rd & 0x1f)); | |
543 | 753 | } |
544 | 754 | |
545 | 755 | /* Register to register move using ORR (shifted register with no shift). */ |
@@ -585,6 +795,22 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, | ||
585 | 795 | tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); |
586 | 796 | } |
587 | 797 | |
798 | +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, | |
799 | + TCGReg rd, uint64_t v64) | |
800 | +{ | |
801 | + int op, cmode, imm8; | |
802 | + | |
803 | + if (is_fimm(v64, &op, &cmode, &imm8)) { | |
804 | + tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8); | |
805 | + } else if (type == TCG_TYPE_V128) { | |
806 | + new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); | |
807 | + tcg_out_insn(s, 3305, LDR_v128, 0, rd); | |
808 | + } else { | |
809 | + new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); | |
810 | + tcg_out_insn(s, 3305, LDR_v64, 0, rd); | |
811 | + } | |
812 | +} | |
813 | + | |
588 | 814 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, |
589 | 815 | tcg_target_long value) |
590 | 816 | { |
@@ -594,6 +820,22 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, | ||
594 | 820 | int s0, s1; |
595 | 821 | AArch64Insn opc; |
596 | 822 | |
823 | + switch (type) { | |
824 | + case TCG_TYPE_I32: | |
825 | + case TCG_TYPE_I64: | |
826 | + tcg_debug_assert(rd < 32); | |
827 | + break; | |
828 | + | |
829 | + case TCG_TYPE_V64: | |
830 | + case TCG_TYPE_V128: | |
831 | + tcg_debug_assert(rd >= 32); | |
832 | + tcg_out_dupi_vec(s, type, rd, value); | |
833 | + return; | |
834 | + | |
835 | + default: | |
836 | + g_assert_not_reached(); | |
837 | + } | |
838 | + | |
597 | 839 | /* For 32-bit values, discard potential garbage in value. For 64-bit |
598 | 840 | values within [2**31, 2**32-1], we can create smaller sequences by |
599 | 841 | interpreting this as a negative 32-bit number, while ensuring that |
@@ -669,15 +911,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, | ||
669 | 911 | /* Define something more legible for general use. */ |
670 | 912 | #define tcg_out_ldst_r tcg_out_insn_3310 |
671 | 913 | |
672 | -static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, | |
673 | - TCGReg rd, TCGReg rn, intptr_t offset) | |
914 | +static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, | |
915 | + TCGReg rn, intptr_t offset, int lgsize) | |
674 | 916 | { |
675 | - TCGMemOp size = (uint32_t)insn >> 30; | |
676 | - | |
677 | 917 | /* If the offset is naturally aligned and in range, then we can |
678 | 918 | use the scaled uimm12 encoding */ |
679 | - if (offset >= 0 && !(offset & ((1 << size) - 1))) { | |
680 | - uintptr_t scaled_uimm = offset >> size; | |
919 | + if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { | |
920 | + uintptr_t scaled_uimm = offset >> lgsize; | |
681 | 921 | if (scaled_uimm <= 0xfff) { |
682 | 922 | tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); |
683 | 923 | return; |
@@ -695,32 +935,102 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, | ||
695 | 935 | tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); |
696 | 936 | } |
697 | 937 | |
698 | -static inline void tcg_out_mov(TCGContext *s, | |
699 | - TCGType type, TCGReg ret, TCGReg arg) | |
938 | +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | |
700 | 939 | { |
701 | - if (ret != arg) { | |
702 | - tcg_out_movr(s, type, ret, arg); | |
940 | + if (ret == arg) { | |
941 | + return; | |
942 | + } | |
943 | + switch (type) { | |
944 | + case TCG_TYPE_I32: | |
945 | + case TCG_TYPE_I64: | |
946 | + if (ret < 32 && arg < 32) { | |
947 | + tcg_out_movr(s, type, ret, arg); | |
948 | + break; | |
949 | + } else if (ret < 32) { | |
950 | + tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); | |
951 | + break; | |
952 | + } else if (arg < 32) { | |
953 | + tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); | |
954 | + break; | |
955 | + } | |
956 | + /* FALLTHRU */ | |
957 | + | |
958 | + case TCG_TYPE_V64: | |
959 | + tcg_debug_assert(ret >= 32 && arg >= 32); | |
960 | + tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); | |
961 | + break; | |
962 | + case TCG_TYPE_V128: | |
963 | + tcg_debug_assert(ret >= 32 && arg >= 32); | |
964 | + tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); | |
965 | + break; | |
966 | + | |
967 | + default: | |
968 | + g_assert_not_reached(); | |
703 | 969 | } |
704 | 970 | } |
705 | 971 | |
706 | -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, | |
707 | - TCGReg arg1, intptr_t arg2) | |
972 | +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | |
973 | + TCGReg base, intptr_t ofs) | |
708 | 974 | { |
709 | - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, | |
710 | - arg, arg1, arg2); | |
975 | + AArch64Insn insn; | |
976 | + int lgsz; | |
977 | + | |
978 | + switch (type) { | |
979 | + case TCG_TYPE_I32: | |
980 | + insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); | |
981 | + lgsz = 2; | |
982 | + break; | |
983 | + case TCG_TYPE_I64: | |
984 | + insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); | |
985 | + lgsz = 3; | |
986 | + break; | |
987 | + case TCG_TYPE_V64: | |
988 | + insn = I3312_LDRVD; | |
989 | + lgsz = 3; | |
990 | + break; | |
991 | + case TCG_TYPE_V128: | |
992 | + insn = I3312_LDRVQ; | |
993 | + lgsz = 4; | |
994 | + break; | |
995 | + default: | |
996 | + g_assert_not_reached(); | |
997 | + } | |
998 | + tcg_out_ldst(s, insn, ret, base, ofs, lgsz); | |
711 | 999 | } |
712 | 1000 | |
713 | -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | |
714 | - TCGReg arg1, intptr_t arg2) | |
1001 | +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, | |
1002 | + TCGReg base, intptr_t ofs) | |
715 | 1003 | { |
716 | - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, | |
717 | - arg, arg1, arg2); | |
1004 | + AArch64Insn insn; | |
1005 | + int lgsz; | |
1006 | + | |
1007 | + switch (type) { | |
1008 | + case TCG_TYPE_I32: | |
1009 | + insn = (src < 32 ? I3312_STRW : I3312_STRVS); | |
1010 | + lgsz = 2; | |
1011 | + break; | |
1012 | + case TCG_TYPE_I64: | |
1013 | + insn = (src < 32 ? I3312_STRX : I3312_STRVD); | |
1014 | + lgsz = 3; | |
1015 | + break; | |
1016 | + case TCG_TYPE_V64: | |
1017 | + insn = I3312_STRVD; | |
1018 | + lgsz = 3; | |
1019 | + break; | |
1020 | + case TCG_TYPE_V128: | |
1021 | + insn = I3312_STRVQ; | |
1022 | + lgsz = 4; | |
1023 | + break; | |
1024 | + default: | |
1025 | + g_assert_not_reached(); | |
1026 | + } | |
1027 | + tcg_out_ldst(s, insn, src, base, ofs, lgsz); | |
718 | 1028 | } |
719 | 1029 | |
720 | 1030 | static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, |
721 | 1031 | TCGReg base, intptr_t ofs) |
722 | 1032 | { |
723 | - if (val == 0) { | |
1033 | + if (type <= TCG_TYPE_I64 && val == 0) { | |
724 | 1034 | tcg_out_st(s, type, TCG_REG_XZR, base, ofs); |
725 | 1035 | return true; |
726 | 1036 | } |
@@ -1210,14 +1520,15 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, | ||
1210 | 1520 | /* Merge "low bits" from tlb offset, load the tlb comparator into X0. |
1211 | 1521 | X0 = load [X2 + (tlb_offset & 0x000fff)] */ |
1212 | 1522 | tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, |
1213 | - TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); | |
1523 | + TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff, | |
1524 | + TARGET_LONG_BITS == 32 ? 2 : 3); | |
1214 | 1525 | |
1215 | 1526 | /* Load the tlb addend. Do that early to avoid stalling. |
1216 | 1527 | X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ |
1217 | 1528 | tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, |
1218 | 1529 | (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - |
1219 | 1530 | (is_read ? offsetof(CPUTLBEntry, addr_read) |
1220 | - : offsetof(CPUTLBEntry, addr_write))); | |
1531 | + : offsetof(CPUTLBEntry, addr_write)), 3); | |
1221 | 1532 | |
1222 | 1533 | /* Perform the address comparison. */ |
1223 | 1534 | tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); |
@@ -1435,49 +1746,49 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
1435 | 1746 | |
1436 | 1747 | case INDEX_op_ld8u_i32: |
1437 | 1748 | case INDEX_op_ld8u_i64: |
1438 | - tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); | |
1749 | + tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); | |
1439 | 1750 | break; |
1440 | 1751 | case INDEX_op_ld8s_i32: |
1441 | - tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); | |
1752 | + tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); | |
1442 | 1753 | break; |
1443 | 1754 | case INDEX_op_ld8s_i64: |
1444 | - tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); | |
1755 | + tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); | |
1445 | 1756 | break; |
1446 | 1757 | case INDEX_op_ld16u_i32: |
1447 | 1758 | case INDEX_op_ld16u_i64: |
1448 | - tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); | |
1759 | + tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); | |
1449 | 1760 | break; |
1450 | 1761 | case INDEX_op_ld16s_i32: |
1451 | - tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); | |
1762 | + tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); | |
1452 | 1763 | break; |
1453 | 1764 | case INDEX_op_ld16s_i64: |
1454 | - tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); | |
1765 | + tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); | |
1455 | 1766 | break; |
1456 | 1767 | case INDEX_op_ld_i32: |
1457 | 1768 | case INDEX_op_ld32u_i64: |
1458 | - tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); | |
1769 | + tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); | |
1459 | 1770 | break; |
1460 | 1771 | case INDEX_op_ld32s_i64: |
1461 | - tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); | |
1772 | + tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); | |
1462 | 1773 | break; |
1463 | 1774 | case INDEX_op_ld_i64: |
1464 | - tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); | |
1775 | + tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); | |
1465 | 1776 | break; |
1466 | 1777 | |
1467 | 1778 | case INDEX_op_st8_i32: |
1468 | 1779 | case INDEX_op_st8_i64: |
1469 | - tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); | |
1780 | + tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); | |
1470 | 1781 | break; |
1471 | 1782 | case INDEX_op_st16_i32: |
1472 | 1783 | case INDEX_op_st16_i64: |
1473 | - tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); | |
1784 | + tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); | |
1474 | 1785 | break; |
1475 | 1786 | case INDEX_op_st_i32: |
1476 | 1787 | case INDEX_op_st32_i64: |
1477 | - tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); | |
1788 | + tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); | |
1478 | 1789 | break; |
1479 | 1790 | case INDEX_op_st_i64: |
1480 | - tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); | |
1791 | + tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); | |
1481 | 1792 | break; |
1482 | 1793 | |
1483 | 1794 | case INDEX_op_add_i32: |
@@ -1776,25 +2087,176 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
1776 | 2087 | |
1777 | 2088 | case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ |
1778 | 2089 | case INDEX_op_mov_i64: |
2090 | + case INDEX_op_mov_vec: | |
1779 | 2091 | case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ |
1780 | 2092 | case INDEX_op_movi_i64: |
2093 | + case INDEX_op_dupi_vec: | |
1781 | 2094 | case INDEX_op_call: /* Always emitted via tcg_out_call. */ |
1782 | 2095 | default: |
1783 | - tcg_abort(); | |
2096 | + g_assert_not_reached(); | |
1784 | 2097 | } |
1785 | 2098 | |
1786 | 2099 | #undef REG0 |
1787 | 2100 | } |
1788 | 2101 | |
2102 | +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | |
2103 | + unsigned vecl, unsigned vece, | |
2104 | + const TCGArg *args, const int *const_args) | |
2105 | +{ | |
2106 | + static const AArch64Insn cmp_insn[16] = { | |
2107 | + [TCG_COND_EQ] = I3616_CMEQ, | |
2108 | + [TCG_COND_GT] = I3616_CMGT, | |
2109 | + [TCG_COND_GE] = I3616_CMGE, | |
2110 | + [TCG_COND_GTU] = I3616_CMHI, | |
2111 | + [TCG_COND_GEU] = I3616_CMHS, | |
2112 | + }; | |
2113 | + static const AArch64Insn cmp0_insn[16] = { | |
2114 | + [TCG_COND_EQ] = I3617_CMEQ0, | |
2115 | + [TCG_COND_GT] = I3617_CMGT0, | |
2116 | + [TCG_COND_GE] = I3617_CMGE0, | |
2117 | + [TCG_COND_LT] = I3617_CMLT0, | |
2118 | + [TCG_COND_LE] = I3617_CMLE0, | |
2119 | + }; | |
2120 | + | |
2121 | + TCGType type = vecl + TCG_TYPE_V64; | |
2122 | + unsigned is_q = vecl; | |
2123 | + TCGArg a0, a1, a2; | |
2124 | + | |
2125 | + a0 = args[0]; | |
2126 | + a1 = args[1]; | |
2127 | + a2 = args[2]; | |
2128 | + | |
2129 | + switch (opc) { | |
2130 | + case INDEX_op_ld_vec: | |
2131 | + tcg_out_ld(s, type, a0, a1, a2); | |
2132 | + break; | |
2133 | + case INDEX_op_st_vec: | |
2134 | + tcg_out_st(s, type, a0, a1, a2); | |
2135 | + break; | |
2136 | + case INDEX_op_add_vec: | |
2137 | + tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); | |
2138 | + break; | |
2139 | + case INDEX_op_sub_vec: | |
2140 | + tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); | |
2141 | + break; | |
2142 | + case INDEX_op_mul_vec: | |
2143 | + tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); | |
2144 | + break; | |
2145 | + case INDEX_op_neg_vec: | |
2146 | + tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); | |
2147 | + break; | |
2148 | + case INDEX_op_and_vec: | |
2149 | + tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); | |
2150 | + break; | |
2151 | + case INDEX_op_or_vec: | |
2152 | + tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); | |
2153 | + break; | |
2154 | + case INDEX_op_xor_vec: | |
2155 | + tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); | |
2156 | + break; | |
2157 | + case INDEX_op_andc_vec: | |
2158 | + tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); | |
2159 | + break; | |
2160 | + case INDEX_op_orc_vec: | |
2161 | + tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); | |
2162 | + break; | |
2163 | + case INDEX_op_not_vec: | |
2164 | + tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); | |
2165 | + break; | |
2166 | + case INDEX_op_dup_vec: | |
2167 | + tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0); | |
2168 | + break; | |
2169 | + case INDEX_op_shli_vec: | |
2170 | + tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); | |
2171 | + break; | |
2172 | + case INDEX_op_shri_vec: | |
2173 | + tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); | |
2174 | + break; | |
2175 | + case INDEX_op_sari_vec: | |
2176 | + tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); | |
2177 | + break; | |
2178 | + case INDEX_op_cmp_vec: | |
2179 | + { | |
2180 | + TCGCond cond = args[3]; | |
2181 | + AArch64Insn insn; | |
2182 | + | |
2183 | + if (cond == TCG_COND_NE) { | |
2184 | + if (const_args[2]) { | |
2185 | + tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); | |
2186 | + } else { | |
2187 | + tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); | |
2188 | + tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); | |
2189 | + } | |
2190 | + } else { | |
2191 | + if (const_args[2]) { | |
2192 | + insn = cmp0_insn[cond]; | |
2193 | + if (insn) { | |
2194 | + tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); | |
2195 | + break; | |
2196 | + } | |
2197 | + tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0); | |
2198 | + a2 = TCG_VEC_TMP; | |
2199 | + } | |
2200 | + insn = cmp_insn[cond]; | |
2201 | + if (insn == 0) { | |
2202 | + TCGArg t; | |
2203 | + t = a1, a1 = a2, a2 = t; | |
2204 | + cond = tcg_swap_cond(cond); | |
2205 | + insn = cmp_insn[cond]; | |
2206 | + tcg_debug_assert(insn != 0); | |
2207 | + } | |
2208 | + tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); | |
2209 | + } | |
2210 | + } | |
2211 | + break; | |
2212 | + default: | |
2213 | + g_assert_not_reached(); | |
2214 | + } | |
2215 | +} | |
2216 | + | |
2217 | +int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | |
2218 | +{ | |
2219 | + switch (opc) { | |
2220 | + case INDEX_op_add_vec: | |
2221 | + case INDEX_op_sub_vec: | |
2222 | + case INDEX_op_mul_vec: | |
2223 | + case INDEX_op_and_vec: | |
2224 | + case INDEX_op_or_vec: | |
2225 | + case INDEX_op_xor_vec: | |
2226 | + case INDEX_op_andc_vec: | |
2227 | + case INDEX_op_orc_vec: | |
2228 | + case INDEX_op_neg_vec: | |
2229 | + case INDEX_op_not_vec: | |
2230 | + case INDEX_op_cmp_vec: | |
2231 | + case INDEX_op_shli_vec: | |
2232 | + case INDEX_op_shri_vec: | |
2233 | + case INDEX_op_sari_vec: | |
2234 | + return 1; | |
2235 | + | |
2236 | + default: | |
2237 | + return 0; | |
2238 | + } | |
2239 | +} | |
2240 | + | |
2241 | +void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | |
2242 | + TCGArg a0, ...) | |
2243 | +{ | |
2244 | +} | |
2245 | + | |
1789 | 2246 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) |
1790 | 2247 | { |
1791 | 2248 | static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; |
1792 | 2249 | static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } }; |
2250 | + static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } }; | |
2251 | + static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; | |
2252 | + static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } }; | |
1793 | 2253 | static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } }; |
1794 | 2254 | static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } }; |
1795 | 2255 | static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } }; |
1796 | 2256 | static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; |
1797 | 2257 | static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; |
2258 | + static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; | |
2259 | + static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; | |
1798 | 2260 | static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; |
1799 | 2261 | static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } }; |
1800 | 2262 | static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } }; |
@@ -1938,6 +2400,29 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
1938 | 2400 | case INDEX_op_sub2_i64: |
1939 | 2401 | return &add2; |
1940 | 2402 | |
2403 | + case INDEX_op_add_vec: | |
2404 | + case INDEX_op_sub_vec: | |
2405 | + case INDEX_op_mul_vec: | |
2406 | + case INDEX_op_and_vec: | |
2407 | + case INDEX_op_or_vec: | |
2408 | + case INDEX_op_xor_vec: | |
2409 | + case INDEX_op_andc_vec: | |
2410 | + case INDEX_op_orc_vec: | |
2411 | + return &w_w_w; | |
2412 | + case INDEX_op_not_vec: | |
2413 | + case INDEX_op_neg_vec: | |
2414 | + case INDEX_op_shli_vec: | |
2415 | + case INDEX_op_shri_vec: | |
2416 | + case INDEX_op_sari_vec: | |
2417 | + return &w_w; | |
2418 | + case INDEX_op_ld_vec: | |
2419 | + case INDEX_op_st_vec: | |
2420 | + return &w_r; | |
2421 | + case INDEX_op_dup_vec: | |
2422 | + return &w_wr; | |
2423 | + case INDEX_op_cmp_vec: | |
2424 | + return &w_w_wZ; | |
2425 | + | |
1941 | 2426 | default: |
1942 | 2427 | return NULL; |
1943 | 2428 | } |
@@ -1947,8 +2432,10 @@ static void tcg_target_init(TCGContext *s) | ||
1947 | 2432 | { |
1948 | 2433 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; |
1949 | 2434 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; |
2435 | + tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; | |
2436 | + tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; | |
1950 | 2437 | |
1951 | - tcg_target_call_clobber_regs = 0xfffffffu; | |
2438 | + tcg_target_call_clobber_regs = -1ull; | |
1952 | 2439 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); |
1953 | 2440 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); |
1954 | 2441 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); |
@@ -1960,12 +2447,21 @@ static void tcg_target_init(TCGContext *s) | ||
1960 | 2447 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); |
1961 | 2448 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); |
1962 | 2449 | tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); |
2450 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); | |
2451 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); | |
2452 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); | |
2453 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); | |
2454 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); | |
2455 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); | |
2456 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); | |
2457 | + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); | |
1963 | 2458 | |
1964 | 2459 | s->reserved_regs = 0; |
1965 | 2460 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); |
1966 | 2461 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); |
1967 | 2462 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); |
1968 | 2463 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ |
2464 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); | |
1969 | 2465 | } |
1970 | 2466 | |
1971 | 2467 | /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ |
@@ -0,0 +1,3 @@ | ||
1 | +/* Target-specific opcodes for host vector expansion. These will be | |
2 | + emitted by tcg_expand_vec_op. For those familiar with GCC internals, | |
3 | + consider these to be UNSPEC with names. */ |