forked from vroon33/RISC-V-Processor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathriscv_instruction_encoder.py
More file actions
275 lines (218 loc) · 9.4 KB
/
riscv_instruction_encoder.py
File metadata and controls
275 lines (218 loc) · 9.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import re
import argparse
# RISC-V opcodes for the requested instructions
OPCODES = {
"add": 0b0110011, # From the image
"sub": 0b0110011, # From the image
"and": 0b0110011, # From the image
"or": 0b0110011, # From the image
"addi": 0b0010011, # From your additional info
"ld": 0b0000011, # From your additional info
"sd": 0b0100011, # From your additional info
"beq": 0b1100011 # From your additional info
}
# RISC-V Function 3 codes
FUNCT3 = {
"add": 0b000, # From the image (0x0)
"sub": 0b000, # From the image (0x0)
"and": 0b111, # From the image (0x7)
"or": 0b110, # From the image (0x6)
"addi": 0b000, # From your additional info
"ld": 0b011, # From your additional info
"sd": 0b011, # From your additional info
"beq": 0b000 # From your additional info
}
# RISC-V Function 7 codes
FUNCT7 = {
"add": 0b0000000, # From the image (0x00)
"sub": 0b0100000, # From the image (0x20)
"and": 0b0000000, # From the image (0x00)
"or": 0b0000000 # From the image (0x00)
}
def parse_register(reg_str):
"""Parse register strings like x0, x1, etc."""
if reg_str.startswith('x') or reg_str.startswith('X'):
try:
reg_num = int(reg_str[1:])
if 0 <= reg_num <= 31:
return reg_num
except ValueError:
pass
# ABI register names
reg_map = {
"zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
"t0": 5, "t1": 6, "t2": 7,
"s0": 8, "fp": 8, "s1": 9,
"a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
"s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
"t3": 28, "t4": 29, "t5": 30, "t6": 31
}
return reg_map.get(reg_str.lower(), -1)
def parse_immediate(imm_str):
"""Parse immediate values in various formats."""
try:
if imm_str.startswith('0x') or imm_str.startswith('0X'):
return int(imm_str, 16)
elif imm_str.startswith('0b') or imm_str.startswith('0B'):
return int(imm_str, 2)
else:
return int(imm_str)
except ValueError:
return None
def parse_mem_offset(operand):
"""Parse memory offsets like 8(x5) into (offset, register)."""
match = re.match(r'(-?\d+)\(([a-zA-Z][0-9a-zA-Z]*|[xX]\d+)\)', operand)
if match:
offset = int(match.group(1))
reg = parse_register(match.group(2))
return offset, reg
return None, None
def encode_r_type(instr, rd, rs1, rs2):
"""Encode R-type instructions: add, sub, and, or."""
opcode = OPCODES[instr]
funct3 = FUNCT3[instr]
funct7 = FUNCT7[instr]
encoded = (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | opcode
return encoded
def encode_i_type(instr, rd, rs1, imm):
"""Encode I-type instructions: addi, ld."""
opcode = OPCODES[instr]
funct3 = FUNCT3[instr]
# For 12-bit immediate
imm = imm & 0xFFF # Ensure it's 12 bits
encoded = (imm << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | opcode
return encoded
def encode_s_type(instr, rs1, rs2, imm):
"""Encode S-type instructions: sd."""
opcode = OPCODES[instr]
funct3 = FUNCT3[instr]
imm = imm & 0xFFF # 12-bit immediate
imm_11_5 = (imm >> 5) & 0x7F # Upper 7 bits
imm_4_0 = imm & 0x1F # Lower 5 bits
encoded = (imm_11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | (imm_4_0 << 7) | opcode
return encoded
def encode_b_type(instr, rs1, rs2, offset):
"""Encode B-type instructions: beq."""
opcode = OPCODES[instr]
funct3 = FUNCT3[instr]
# B-type immediates are multiples of 2 (last bit is always 0)
# Extract the bits according to the B-type format
imm = offset & 0x1FFE # 13-bit immediate (bit 0 is always 0)
imm_12 = (offset >> 12) & 0x1 # bit 12
imm_11 = (offset >> 11) & 0x1 # bit 11
imm_10_5 = (offset >> 5) & 0x3F # bits 10-5
imm_4_1 = (offset >> 1) & 0xF # bits 4-1
encoded = (imm_12 << 31) | (imm_10_5 << 25) | (rs2 << 20) | (rs1 << 15) | \
(funct3 << 12) | (imm_4_1 << 8) | (imm_11 << 7) | opcode
return encoded
def parse_instruction(line):
"""Parse a RISC-V assembly instruction and return its components."""
# Remove comments and trim whitespace
line = re.sub(r'#.*$', '', line).strip()
if not line:
return None
# Split into instruction and operands
parts = re.split(r'\s+', line, 1)
if len(parts) < 2:
return parts[0], []
instr = parts[0].lower()
# Split operands and remove whitespace
operands = [op.strip() for op in parts[1].split(',')]
return instr, operands
def encode_instruction(instr, operands):
"""Encode a RISC-V instruction based on its mnemonic and operands."""
if instr not in OPCODES:
return None, f"Unsupported instruction: {instr}"
# R-type: add rd, rs1, rs2
if instr in ["add", "sub", "and", "or"]:
if len(operands) != 3:
return None, f"{instr} requires 3 operands: rd, rs1, rs2"
rd = parse_register(operands[0])
rs1 = parse_register(operands[1])
rs2 = parse_register(operands[2])
if -1 in [rd, rs1, rs2]:
return None, f"Invalid register in: {instr} {', '.join(operands)}"
return encode_r_type(instr, rd, rs1, rs2), None
# I-type (immediate): addi rd, rs1, imm
elif instr == "addi":
if len(operands) != 3:
return None, f"{instr} requires 3 operands: rd, rs1, imm"
rd = parse_register(operands[0])
rs1 = parse_register(operands[1])
imm = parse_immediate(operands[2])
if -1 in [rd, rs1] or imm is None:
return None, f"Invalid operand in: {instr} {', '.join(operands)}"
return encode_i_type(instr, rd, rs1, imm), None
# I-type (load): ld rd, offset(rs1)
elif instr == "ld":
if len(operands) != 2:
return None, f"{instr} requires 2 operands: rd, offset(rs1)"
rd = parse_register(operands[0])
offset, rs1 = parse_mem_offset(operands[1])
if -1 in [rd, rs1] or offset is None:
return None, f"Invalid operand in: {instr} {', '.join(operands)}"
return encode_i_type(instr, rd, rs1, offset), None
# S-type: sd rs2, offset(rs1)
elif instr == "sd":
if len(operands) != 2:
return None, f"{instr} requires 2 operands: rs2, offset(rs1)"
rs2 = parse_register(operands[0])
offset, rs1 = parse_mem_offset(operands[1])
if -1 in [rs1, rs2] or offset is None:
return None, f"Invalid operand in: {instr} {', '.join(operands)}"
return encode_s_type(instr, rs1, rs2, offset), None
# B-type: beq rs1, rs2, offset
elif instr == "beq":
if len(operands) != 3:
return None, f"{instr} requires 3 operands: rs1, rs2, offset"
rs1 = parse_register(operands[0])
rs2 = parse_register(operands[1])
offset = parse_immediate(operands[2])
if -1 in [rs1, rs2] or offset is None:
return None, f"Invalid operand in: {instr} {', '.join(operands)}"
return encode_b_type(instr, rs1, rs2, offset), None
return None, f"Instruction encoding not implemented: {instr}"
def write_executable_format(encoded_instructions, output_file):
"""
Write encoded instructions to a file in the executable format.
Each line contains 2 hex digits (8 bits), with most significant byte first.
"""
with open(output_file, 'w') as f:
for instruction in encoded_instructions:
# Extract each byte and write as 2 hex digits per line
for byte_pos in range(3, -1, -1): # From 3 to 0 (MSB to LSB)
byte_val = (instruction >> (byte_pos * 8)) & 0xFF
f.write(f"{byte_val:02x}\n")
def main():
parser = argparse.ArgumentParser(description='Encode RISC-V instructions to hex')
parser.add_argument('input_file', help='File with RISC-V assembly instructions')
parser.add_argument('-o', '--output', default='hex_instructions.s',
help='Output file (default: hex_instructions.s)')
parser.add_argument('-e', '--executable', default='executable.s',
help='Executable output file (default: executable.s)')
args = parser.parse_args()
with open(args.input_file, 'r') as f:
lines = f.readlines()
results = []
encoded_instructions = []
for i, line in enumerate(lines, 1):
parsed = parse_instruction(line)
if parsed is None:
continue
instr, operands = parsed
encoded, error = encode_instruction(instr, operands)
if error:
results.append(f"Line {i}: {error}")
else:
results.append(f"{line.strip():40} # 0x{encoded:08x}")
encoded_instructions.append(encoded)
output = '\n'.join(results)
# Write the annotated output
with open(args.output, 'w') as f:
f.write(output)
print(f"Output written to {args.output}")
# Write the executable format
write_executable_format(encoded_instructions, args.executable)
print(f"Executable format written to {args.executable}")
if __name__ == "__main__":
main()