Skip to content

Commit ee31d80

Browse files
committed
s390x-z17: Implement load indexed address instruction
1 parent 763ace0 commit ee31d80

File tree

7 files changed

+302
-1
lines changed

7 files changed

+302
-1
lines changed

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,22 @@
10081008
(rd WritableReg)
10091009
(mem MemArg))
10101010

1011+
;; Load address referenced by `mem` into `rd`.
1012+
(LoadIndexedAddr
1013+
(rd WritableReg)
1014+
(base Reg)
1015+
(index Reg)
1016+
(offset i16)
1017+
(size u8))
1018+
1019+
;; Load address referenced by `mem` into `rd`.
1020+
(LoadLogicalIndexedAddr
1021+
(rd WritableReg)
1022+
(base Reg)
1023+
(index Reg)
1024+
(offset u16)
1025+
(size u8))
1026+
10111027
;; Meta-instruction to emit a loop around a sequence of instructions.
10121028
;; This control flow is not visible to the compiler core, in particular
10131029
;; the register allocator. Therefore, instructions in the loop may not
@@ -1741,6 +1757,9 @@
17411757
(decl u32_from_value (u32) Value)
17421758
(extern extractor u32_from_value u32_from_value)
17431759

1760+
(decl u16_from_value (u16) Value)
1761+
(extern extractor u16_from_value u16_from_value)
1762+
17441763
(decl u8_from_value (u8) Value)
17451764
(extern extractor u8_from_value u8_from_value)
17461765

@@ -1860,6 +1879,9 @@
18601879
(decl pure partial memarg_imm_from_offset_plus_bias (Offset32 u8) SImm20)
18611880
(extern constructor memarg_imm_from_offset_plus_bias memarg_imm_from_offset_plus_bias)
18621881

1882+
(decl pure partial memarg_imm_from_shifted_offset (Offset32 u8) SImm20)
1883+
(extern constructor memarg_imm_from_shifted_offset memarg_imm_from_shifted_offset)
1884+
18631885
;; Accessors for `MemFlags`.
18641886

18651887
(decl littleendian () MemFlags)
@@ -1927,6 +1949,21 @@
19271949
(if-let final_offset (memarg_symbol_offset_sum offset sym_offset))
19281950
(memarg_symbol name final_offset flags))
19291951

1952+
(rule 2 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled))
1953+
(iadd (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)) y)) (offset32 0))
1954+
(memarg_reg_plus_off (load_logical_indexed_addr x y z shift) 0 0 flags))
1955+
1956+
(rule 3 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled))
1957+
(iadd y (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)))) (offset32 0))
1958+
(memarg_reg_plus_off (load_logical_indexed_addr y x z shift) 0 0 flags))
1959+
1960+
(rule 4 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled))
1961+
(iadd (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)) y)) (offset32 0))
1962+
(memarg_reg_plus_off (load_indexed_addr x y z shift) 0 0 flags))
1963+
1964+
(rule 5 (lower_address flags (has_type (and (ty_addr64 _) (mie4_enabled))
1965+
(iadd y (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)))) (offset32 0))
1966+
(memarg_reg_plus_off (load_indexed_addr y x z shift) 0 0 flags))
19301967

19311968
;; Lower an address plus a small bias into a `MemArg`.
19321969

@@ -2817,6 +2854,20 @@
28172854
(_ Unit (emit (MInst.LoadAddr dst mem))))
28182855
dst))
28192856

2857+
;; Helper for emitting `MInst.LoadIndexedAddr` instructions.
2858+
(decl load_indexed_addr (Reg Reg i16 u8) Reg)
2859+
(rule (load_indexed_addr base index offset size)
2860+
(let ((dst WritableReg (temp_writable_reg $I64))
2861+
(_ Unit (emit (MInst.LoadIndexedAddr dst base index offset size))))
2862+
dst))
2863+
2864+
;; Helper for emitting `MInst.LoadLogicalIndexedAddr` instructions.
2865+
(decl load_logical_indexed_addr (Reg Reg u16 u8) Reg)
2866+
(rule (load_logical_indexed_addr base index offset size)
2867+
(let ((dst WritableReg (temp_writable_reg $I64))
2868+
(_ Unit (emit (MInst.LoadLogicalIndexedAddr dst base index offset size))))
2869+
dst))
2870+
28202871
;; Helper for emitting `MInst.Call` instructions.
28212872
(decl call_impl (WritableReg BoxCallInfo) SideEffectNoResult)
28222873
(rule (call_impl reg info)

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2321,6 +2321,30 @@ impl Inst {
23212321
rd, &mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state,
23222322
);
23232323
}
2324+
&Inst::LoadIndexedAddr {
2325+
rd,
2326+
base,
2327+
index,
2328+
offset,
2329+
size,
2330+
} => {
2331+
let opcode: u16 = 0xe361 | (size as u16 & 0xf) << 1;
2332+
let offset = offset as i32 as u32;
2333+
put(sink, &enc_rxy(opcode, rd.to_reg(), base, index, offset));
2334+
}
2335+
&Inst::LoadLogicalIndexedAddr {
2336+
rd,
2337+
base,
2338+
index,
2339+
offset,
2340+
size,
2341+
} => {
2342+
let opcode: u16 = 0xe360 | (size as u16 & 0xf) << 1;
2343+
put(
2344+
sink,
2345+
&enc_rxy(opcode, rd.to_reg(), base, index, offset.into()),
2346+
);
2347+
}
23242348

23252349
&Inst::Mov64 { rd, rm } => {
23262350
let opcode = 0xb904; // LGR

cranelift/codegen/src/isa/s390x/inst/mod.rs

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! This module defines s390x-specific machine instruction types.
22
33
use crate::binemit::{Addend, CodeOffset, Reloc};
4-
use crate::ir::{ExternalName, Type, types};
4+
use crate::ir::{ExternalName, MemFlags, Type, types};
55
use crate::isa::s390x::abi::S390xMachineDeps;
66
use crate::isa::{CallConv, FunctionAlignment};
77
use crate::machinst::*;
@@ -240,6 +240,10 @@ impl Inst {
240240
| Inst::Unwind { .. }
241241
| Inst::ElfTlsGetOffset { .. } => InstructionSet::Base,
242242

243+
Inst::LoadIndexedAddr { .. } | Inst::LoadLogicalIndexedAddr { .. } => {
244+
InstructionSet::MIE4
245+
}
246+
243247
// These depend on the opcode
244248
Inst::AluRRR { alu_op, .. } => match alu_op {
245249
ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE3,
@@ -1030,6 +1034,20 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor<impl Ope
10301034
collector.reg_def(rd);
10311035
memarg_operands(mem, collector);
10321036
}
1037+
Inst::LoadIndexedAddr {
1038+
rd, base, index, ..
1039+
} => {
1040+
collector.reg_def(rd);
1041+
collector.reg_use(base);
1042+
collector.reg_use(index);
1043+
}
1044+
Inst::LoadLogicalIndexedAddr {
1045+
rd, base, index, ..
1046+
} => {
1047+
collector.reg_def(rd);
1048+
collector.reg_use(base);
1049+
collector.reg_use(index);
1050+
}
10331051
Inst::StackProbeLoop { probe_count, .. } => {
10341052
collector.reg_early_def(probe_count);
10351053
}
@@ -3507,6 +3525,58 @@ impl Inst {
35073525

35083526
format!("{mem_str}{op} {rd}, {mem}")
35093527
}
3528+
&Inst::LoadIndexedAddr {
3529+
rd,
3530+
base,
3531+
index,
3532+
offset,
3533+
size,
3534+
} => {
3535+
let rd = pretty_print_reg(rd.to_reg());
3536+
let op = match size {
3537+
1 => "lxah",
3538+
2 => "lxaf",
3539+
3 => "lxag",
3540+
4 => "lxaq",
3541+
_ => unreachable!(),
3542+
};
3543+
let disp = SImm20::maybe_from_i64(offset as i64).unwrap();
3544+
let flags = MemFlags::trusted();
3545+
let mem = MemArg::BXD20 {
3546+
base,
3547+
index,
3548+
disp,
3549+
flags,
3550+
};
3551+
let mem = mem.pretty_print_default();
3552+
format!("{op} {rd}, {mem}")
3553+
}
3554+
&Inst::LoadLogicalIndexedAddr {
3555+
rd,
3556+
base,
3557+
index,
3558+
offset,
3559+
size,
3560+
} => {
3561+
let rd = pretty_print_reg(rd.to_reg());
3562+
let op = match size {
3563+
1 => "llxah",
3564+
2 => "llxaf",
3565+
3 => "llxag",
3566+
4 => "llxaq",
3567+
_ => unreachable!(),
3568+
};
3569+
let disp = SImm20::maybe_from_i64(offset as i64).unwrap();
3570+
let flags = MemFlags::trusted();
3571+
let mem = MemArg::BXD20 {
3572+
base,
3573+
index,
3574+
disp,
3575+
flags,
3576+
};
3577+
let mem = mem.pretty_print_default();
3578+
format!("{op} {rd}, {mem}")
3579+
}
35103580
&Inst::StackProbeLoop {
35113581
probe_count,
35123582
guard_size,

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,21 @@
115115
(rule 1 (lower (has_type (vr128_ty ty) (iadd x y)))
116116
(vec_add ty x y))
117117

118+
(rule 16 (lower (has_type (and (ty_addr64 _) (mie4_enabled))
119+
(iadd (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)) y)))
120+
(load_logical_indexed_addr x y z shift))
121+
122+
(rule 17 (lower (has_type (and (ty_addr64 _) (mie4_enabled))
123+
(iadd y (ishl (uextend (iadd x (u16_from_value z))) (u8_from_value shift)))))
124+
(load_logical_indexed_addr y x z shift))
125+
126+
(rule 18 (lower (has_type (and (ty_addr64 _) (mie4_enabled))
127+
(iadd (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)) y)))
128+
(load_indexed_addr x y z shift))
129+
130+
(rule 19 (lower (has_type (and (ty_addr64 _) (mie4_enabled))
131+
(iadd y (ishl (sextend (iadd x (i16_from_value z))) (u8_from_value shift)))))
132+
(load_indexed_addr y x z shift))
118133

119134
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
120135

cranelift/codegen/src/isa/s390x/lower/isle.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,13 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
478478
Some(imm)
479479
}
480480

481+
#[inline]
482+
fn u16_from_value(&mut self, val: Value) -> Option<u16> {
483+
let constant = self.u64_from_value(val)?;
484+
let imm = u16::try_from(constant).ok()?;
485+
Some(imm)
486+
}
487+
481488
#[inline]
482489
fn u8_from_value(&mut self, val: Value) -> Option<u8> {
483490
let constant = self.u64_from_value(val)?;
@@ -722,6 +729,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
722729
SImm20::maybe_from_i64(i64::from(imm))
723730
}
724731

732+
#[inline]
733+
fn memarg_imm_from_shifted_offset(&mut self, imm: Offset32, shift: u8) -> Option<SImm20> {
734+
if (1..=4).contains(&shift) && i64::from(imm) & ((1 << shift) - 1) == 0 {
735+
SImm20::maybe_from_i64(i64::from(imm) >> shift)
736+
} else {
737+
None
738+
}
739+
}
740+
725741
#[inline]
726742
fn memarg_imm_from_offset_plus_bias(&mut self, imm: Offset32, bias: u8) -> Option<SImm20> {
727743
let final_offset = i64::from(imm) + bias as i64;

cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,48 @@ block0(v0: i128):
327327
; vst %v4, 0(%r2)
328328
; br %r14
329329

330+
function %i64_i32_offset_mul_unsigned(i64, i32) -> i64 {
331+
block0(v0: i64, v1: i32):
332+
v2 = iconst.i8 4
333+
v3 = iconst.i32 8000
334+
v4 = iadd v1, v3
335+
v5 = uextend.i64 v4
336+
v6 = ishl v5, v2
337+
v7 = iadd v0, v6
338+
return v7
339+
}
340+
341+
; VCode:
342+
; block0:
343+
; llxaq %r2, 8000(%r3,%r2)
344+
; br %r14
345+
;
346+
; Disassembled:
347+
; block0: ; offset 0x0
348+
; .byte 0xe3, 0x23
349+
; swr %f4, %f0
350+
; .byte 0x01, 0x68
351+
; br %r14
352+
353+
function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 {
354+
block0(v0: i64, v1: i32):
355+
v2 = iconst.i8 4
356+
v3 = iconst.i32 8000
357+
v4 = iadd v1, v3
358+
v5 = sextend.i64 v4
359+
v6 = ishl v5, v2
360+
v7 = iadd v0, v6
361+
return v7
362+
}
363+
364+
; VCode:
365+
; block0:
366+
; lxaq %r2, 8000(%r3,%r2)
367+
; br %r14
368+
;
369+
; Disassembled:
370+
; block0: ; offset 0x0
371+
; .byte 0xe3, 0x23
372+
; swr %f4, %f0
373+
; .byte 0x01, 0x69
374+
; br %r14
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
test compile precise-output
2+
set enable_multi_ret_implicit_sret
3+
target s390x arch15
4+
5+
function %uload8_i64_i32_offset_mul_unsigned(i64, i32) -> i64 {
6+
block0(v0: i64, v1: i32):
7+
v2 = iconst.i8 4
8+
v3 = iconst.i32 8000
9+
v4 = iadd v1, v3
10+
v5 = uextend.i64 v4
11+
v6 = ishl v5, v2
12+
v7 = iadd v0, v6
13+
v8 = uload8.i64 v7
14+
return v8
15+
}
16+
17+
; VCode:
18+
; block0:
19+
; llxaq %r3, 8000(%r3,%r2)
20+
; llgc %r2, 0(%r3)
21+
; br %r14
22+
;
23+
; Disassembled:
24+
; block0: ; offset 0x0
25+
; .byte 0xe3, 0x33
26+
; swr %f4, %f0
27+
; .byte 0x01, 0x68
28+
; llgc %r2, 0(%r3) ; trap: heap_oob
29+
; br %r14
30+
31+
function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 {
32+
block0(v0: i64, v1: i32):
33+
v2 = iconst.i8 4
34+
v3 = iconst.i32 8000
35+
v4 = iadd v1, v3
36+
v5 = sextend.i64 v4
37+
v6 = ishl v5, v2
38+
v7 = iadd v0, v6
39+
v8 = uload8.i64 v7
40+
return v8
41+
}
42+
43+
; VCode:
44+
; block0:
45+
; lxaq %r3, 8000(%r3,%r2)
46+
; llgc %r2, 0(%r3)
47+
; br %r14
48+
;
49+
; Disassembled:
50+
; block0: ; offset 0x0
51+
; .byte 0xe3, 0x33
52+
; swr %f4, %f0
53+
; .byte 0x01, 0x69
54+
; llgc %r2, 0(%r3) ; trap: heap_oob
55+
; br %r14
56+
57+
function %uload8_i64_i64_offset_shifted0(i64, i32) -> i64 {
58+
block0(v0: i64, v1: i32):
59+
v2 = iconst.i8 0
60+
v3 = uextend.i64 v1
61+
v4 = ishl v3, v2
62+
v5 = iadd v0, v4
63+
v6 = uload8.i64 v5+1000
64+
return v6
65+
}
66+
67+
; VCode:
68+
; block0:
69+
; llgfr %r5, %r3
70+
; sllg %r5, %r5, 0
71+
; llgc %r2, 1000(%r5,%r2)
72+
; br %r14
73+
;
74+
; Disassembled:
75+
; block0: ; offset 0x0
76+
; llgfr %r5, %r3
77+
; sllg %r5, %r5, 0
78+
; llgc %r2, 0x3e8(%r5, %r2) ; trap: heap_oob
79+
; br %r14
80+

0 commit comments

Comments
 (0)