Skip to content

Commit c9af4e1

Browse files
AJMansfielddpgeorge
authored andcommitted
tools/mpy-tool.py: Add Compiler Explorer JSON output.
This commit adds a `--json` option to `mpy-tool.py`, in order to generate Compiler-Explorer-compatible JSON annotation information for the bytecode disassembly. Some of this information might be theoretically possible to parse out from the text itself, but not all of it is, e.g. disambiguating child references with non-unique simple names. Signed-off-by: Anson Mansfield <amansfield@mantaro.com>
1 parent db8273d commit c9af4e1

File tree

1 file changed

+280
-16
lines changed

1 file changed

+280
-16
lines changed

tools/mpy-tool.py

Lines changed: 280 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2525
# THE SOFTWARE.
2626

27+
import io
2728
import struct
2829
import sys
2930
from binascii import hexlify
@@ -302,6 +303,25 @@ class Opcode:
302303
MP_BC_POP_JUMP_IF_TRUE,
303304
MP_BC_POP_JUMP_IF_FALSE,
304305
)
306+
ALL_OFFSET = (
307+
MP_BC_UNWIND_JUMP,
308+
MP_BC_JUMP,
309+
MP_BC_POP_JUMP_IF_TRUE,
310+
MP_BC_POP_JUMP_IF_FALSE,
311+
MP_BC_JUMP_IF_TRUE_OR_POP,
312+
MP_BC_JUMP_IF_FALSE_OR_POP,
313+
MP_BC_SETUP_WITH,
314+
MP_BC_SETUP_EXCEPT,
315+
MP_BC_SETUP_FINALLY,
316+
MP_BC_POP_EXCEPT_JUMP,
317+
MP_BC_FOR_ITER,
318+
)
319+
ALL_WITH_CHILD = (
320+
MP_BC_MAKE_FUNCTION,
321+
MP_BC_MAKE_FUNCTION_DEFARGS,
322+
MP_BC_MAKE_CLOSURE,
323+
MP_BC_MAKE_CLOSURE_DEFARGS,
324+
)
305325

306326
# Create a dict mapping opcode value to opcode name.
307327
mapping = ["unknown" for _ in range(256)]
@@ -896,7 +916,7 @@ def __init__(self, parent_name, qstr_table, fun_data, prelude_offset, code_kind)
896916
self.escaped_name = unique_escaped_name
897917

898918
def disassemble_children(self):
899-
print(" children:", [rc.simple_name.str for rc in self.children])
919+
self.print_children_annotated()
900920
for rc in self.children:
901921
rc.disassemble()
902922

@@ -985,6 +1005,75 @@ def freeze_raw_code(self, prelude_ptr=None, type_sig=0):
9851005
raw_code_count += 1
9861006
raw_code_content += 4 * 4
9871007

1008+
@staticmethod
1009+
def decode_lineinfo(line_info: memoryview) -> "tuple[int, int, memoryview]":
1010+
c = line_info[0]
1011+
if (c & 0x80) == 0:
1012+
# 0b0LLBBBBB encoding
1013+
return (c & 0x1F), (c >> 5), line_info[1:]
1014+
else:
1015+
# 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
1016+
return (c & 0xF), (((c << 4) & 0x700) | line_info[1]), line_info[2:]
1017+
1018+
def get_source_annotation(self, ip: int, file=None) -> dict:
1019+
bc_offset = ip - self.offset_opcodes
1020+
try:
1021+
line_info = memoryview(self.fun_data)[self.offset_line_info : self.offset_opcodes]
1022+
except AttributeError:
1023+
return {"file": file, "line": None}
1024+
1025+
source_line = 1
1026+
while line_info:
1027+
bc_increment, line_increment, line_info = self.decode_lineinfo(line_info)
1028+
if bc_offset >= bc_increment:
1029+
bc_offset -= bc_increment
1030+
source_line += line_increment
1031+
else:
1032+
break
1033+
1034+
return {"file": file, "line": source_line}
1035+
1036+
def get_label(self, ip: "int | None" = None, child_num: "int | None" = None) -> str:
1037+
if ip is not None:
1038+
assert child_num is None
1039+
return "%s.%d" % (self.escaped_name, ip)
1040+
elif child_num is not None:
1041+
return "%s.child%d" % (self.escaped_name, child_num)
1042+
else:
1043+
return "%s" % self.escaped_name
1044+
1045+
def print_children_annotated(self) -> None:
1046+
"""
1047+
Equivalent to `print(" children:", [child.simple_name.str for child in self.children])`,
1048+
but also includes json markers for the start and end of each one's name in that line.
1049+
"""
1050+
1051+
labels = ["%s.children" % self.escaped_name]
1052+
annotation_labels = []
1053+
output = io.StringIO()
1054+
output.write(" children: [")
1055+
sep = ", "
1056+
for i, child in enumerate(self.children):
1057+
if i != 0:
1058+
output.write(sep)
1059+
start_col = output.tell() + 1
1060+
output.write(child.simple_name.str)
1061+
end_col = output.tell() + 1
1062+
labels.append(self.get_label(child_num=i))
1063+
annotation_labels.append(
1064+
{
1065+
"name": self.get_label(child_num=i),
1066+
"target": child.get_label(),
1067+
"range": {
1068+
"startCol": start_col,
1069+
"endCol": end_col,
1070+
},
1071+
},
1072+
)
1073+
output.write("]")
1074+
1075+
print(output.getvalue(), annotations={"labels": annotation_labels}, labels=labels)
1076+
9881077

9891078
class RawCodeBytecode(RawCode):
9901079
def __init__(self, parent_name, qstr_table, obj_table, fun_data):
@@ -993,9 +1082,58 @@ def __init__(self, parent_name, qstr_table, obj_table, fun_data):
9931082
parent_name, qstr_table, fun_data, 0, MP_CODE_BYTECODE
9941083
)
9951084

1085+
def get_opcode_annotations_labels(
1086+
self, opcode: int, ip: int, arg: int, sz: int, arg_pos: int, arg_len: int
1087+
) -> "tuple[dict, list[str]]":
1088+
annotations = {
1089+
"source": self.get_source_annotation(ip),
1090+
"disassembly": Opcode.mapping[opcode],
1091+
}
1092+
labels = [self.get_label(ip)]
1093+
1094+
if opcode in Opcode.ALL_OFFSET:
1095+
annotations["link"] = {
1096+
"offset": arg_pos,
1097+
"length": arg_len,
1098+
"to": ip + arg + sz,
1099+
}
1100+
annotations["labels"] = [
1101+
{
1102+
"name": self.get_label(ip),
1103+
"target": self.get_label(ip + arg + sz),
1104+
"range": {
1105+
"startCol": arg_pos + 1,
1106+
"endCol": arg_pos + arg_len + 1,
1107+
},
1108+
},
1109+
]
1110+
1111+
elif opcode in Opcode.ALL_WITH_CHILD:
1112+
try:
1113+
child = self.children[arg]
1114+
except IndexError:
1115+
# link out-of-range child to the child array itself
1116+
target = "%s.children" % self.escaped_name
1117+
else:
1118+
# link resolvable child to the actual child
1119+
target = child.get_label()
1120+
1121+
annotations["labels"] = [
1122+
{
1123+
"name": self.get_label(ip),
1124+
"target": target,
1125+
"range": {
1126+
"startCol": arg_pos + 1,
1127+
"endCol": arg_pos + arg_len + 1,
1128+
},
1129+
},
1130+
]
1131+
1132+
return annotations, labels
1133+
9961134
def disassemble(self):
9971135
bc = self.fun_data
998-
print("simple_name:", self.simple_name.str)
1136+
print("simple_name:", self.simple_name.str, labels=[self.get_label()])
9991137
print(" raw bytecode:", len(bc), hexlify_to_str(bc))
10001138
print(" prelude:", self.prelude_signature)
10011139
print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
@@ -1011,9 +1149,22 @@ def disassemble(self):
10111149
pass
10121150
else:
10131151
arg = ""
1014-
print(
1015-
" %-11s %s %s" % (hexlify_to_str(bc[ip : ip + sz]), Opcode.mapping[bc[ip]], arg)
1152+
1153+
pre_arg_part = " %-11s %s" % (
1154+
hexlify_to_str(bc[ip : ip + sz]),
1155+
Opcode.mapping[bc[ip]],
1156+
)
1157+
arg_part = "%s" % arg
1158+
annotations, labels = self.get_opcode_annotations_labels(
1159+
opcode=bc[ip],
1160+
ip=ip,
1161+
arg=arg,
1162+
sz=sz,
1163+
arg_pos=len(pre_arg_part) + 1,
1164+
arg_len=len(arg_part),
10161165
)
1166+
1167+
print(pre_arg_part, arg_part, annotations=annotations, labels=labels)
10171168
ip += sz
10181169
self.disassemble_children()
10191170

@@ -1114,7 +1265,7 @@ def __init__(
11141265

11151266
def disassemble(self):
11161267
fun_data = self.fun_data
1117-
print("simple_name:", self.simple_name.str)
1268+
print("simple_name:", self.simple_name.str, labels=[self.get_label()])
11181269
print(
11191270
" raw data:",
11201271
len(fun_data),
@@ -1833,6 +1984,100 @@ def extract_segments(compiled_modules, basename, kinds_arg):
18331984
output.write(source.read(segment.end - segment.start))
18341985

18351986

1987+
class PrintShim:
1988+
"""Base class for interposing extra functionality onto the global `print` method."""
1989+
1990+
def __init__(self):
1991+
self.wrapped_print = None
1992+
1993+
def __enter__(self):
1994+
global print
1995+
1996+
if self.wrapped_print is not None:
1997+
raise RecursionError
1998+
1999+
self.wrapped_print = print
2000+
print = self
2001+
2002+
return self
2003+
2004+
def __exit__(self, exc_type, exc_value, traceback):
2005+
global print
2006+
2007+
if self.wrapped_print is None:
2008+
return
2009+
2010+
print = self.wrapped_print
2011+
self.wrapped_print = None
2012+
2013+
self.on_exit()
2014+
2015+
def on_exit(self):
2016+
pass
2017+
2018+
def __call__(self, *a, **k):
2019+
return self.wrapped_print(*a, **k)
2020+
2021+
2022+
class PrintIgnoreExtraArgs(PrintShim):
2023+
"""Just strip the `annotations` and `labels` kwargs and pass down to the underlying print."""
2024+
2025+
def __call__(self, *a, annotations: dict = {}, labels: "list[str]" = (), **k):
2026+
return super().__call__(*a, **k)
2027+
2028+
2029+
class PrintJson(PrintShim):
2030+
"""Output lines as godbolt-compatible JSON with extra annotation info from `annotations` and `labels`, rather than plain text."""
2031+
2032+
def __init__(self, fp=sys.stdout, language_id: str = "mpy"):
2033+
super().__init__()
2034+
self.fp = fp
2035+
self.asm = {
2036+
"asm": [],
2037+
"labelDefinitions": {},
2038+
"languageId": language_id,
2039+
}
2040+
self.line_number: int = 0
2041+
self.buf: "io.StringIO | None" = None
2042+
2043+
def on_exit(self):
2044+
import json
2045+
2046+
if self.buf is not None:
2047+
# flush last partial line
2048+
self.__call__()
2049+
2050+
json.dump(self.asm, self.fp)
2051+
2052+
def __call__(self, *a, annotations: dict = {}, labels: "list[str]" = (), **k):
2053+
# ignore prints directed to an explicit output
2054+
if "file" in k:
2055+
return super().__call__(*a, **k)
2056+
2057+
if self.buf is None:
2058+
self.buf = io.StringIO()
2059+
2060+
super().__call__(*a, file=sys.stderr, **k)
2061+
2062+
if "end" in k:
2063+
# buffer partial-line prints to collect into a single AsmResultLine
2064+
return super().__call__(*a, file=self.buf, **k)
2065+
else:
2066+
retval = super().__call__(*a, file=self.buf, end="", **k)
2067+
output = self.buf.getvalue()
2068+
self.buf = None
2069+
2070+
asm_line = {"text": output}
2071+
asm_line.update(annotations)
2072+
self.asm["asm"].append(asm_line)
2073+
2074+
self.line_number += 1
2075+
for label in labels:
2076+
self.asm["labelDefinitions"][label] = self.line_number
2077+
2078+
return retval
2079+
2080+
18362081
def main(args=None):
18372082
global global_qstrs
18382083

@@ -1846,6 +2091,12 @@ def main(args=None):
18462091
"-d", "--disassemble", action="store_true", help="output disassembled contents of files"
18472092
)
18482093
cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files")
2094+
cmd_parser.add_argument(
2095+
"-j",
2096+
"--json",
2097+
action="store_true",
2098+
help="output hexdump, disassembly, and frozen code as JSON with extra metadata",
2099+
)
18492100
cmd_parser.add_argument(
18502101
"--merge", action="store_true", help="merge multiple .mpy files into one"
18512102
)
@@ -1913,20 +2164,33 @@ def main(args=None):
19132164
print(er, file=sys.stderr)
19142165
sys.exit(1)
19152166

1916-
if args.hexdump:
1917-
hexdump_mpy(compiled_modules)
2167+
if args.json:
2168+
if args.freeze:
2169+
print_shim = PrintJson(sys.stdout, language_id="c")
2170+
elif args.hexdump:
2171+
print_shim = PrintJson(sys.stdout, language_id="stderr")
2172+
elif args.disassemble:
2173+
print_shim = PrintJson(sys.stdout, language_id="mpy")
2174+
else:
2175+
print_shim = PrintJson(sys.stdout)
2176+
else:
2177+
print_shim = PrintIgnoreExtraArgs()
19182178

1919-
if args.disassemble:
2179+
with print_shim:
19202180
if args.hexdump:
1921-
print()
1922-
disassemble_mpy(compiled_modules)
2181+
hexdump_mpy(compiled_modules)
19232182

1924-
if args.freeze:
1925-
try:
1926-
freeze_mpy(firmware_qstr_idents, compiled_modules)
1927-
except FreezeError as er:
1928-
print(er, file=sys.stderr)
1929-
sys.exit(1)
2183+
if args.disassemble:
2184+
if args.hexdump:
2185+
print()
2186+
disassemble_mpy(compiled_modules)
2187+
2188+
if args.freeze:
2189+
try:
2190+
freeze_mpy(firmware_qstr_idents, compiled_modules)
2191+
except FreezeError as er:
2192+
print(er, file=sys.stderr)
2193+
sys.exit(1)
19302194

19312195
if args.merge:
19322196
merge_mpy(compiled_modules, args.output)

0 commit comments

Comments
 (0)