Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ warn_redundant_casts = true
warn_unused_ignores = true

[tool.ruff]
line-length = 120
line-length = 100
indent-width = 2

[tool.ruff.format]
Expand Down
53 changes: 30 additions & 23 deletions src/mldebug/aie_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from mldebug.extra.aie_guidance import AIEGuidanceChecker


class AIEStatus:
"""
Top level class to manage aie status
Expand Down Expand Up @@ -84,7 +85,9 @@ def _append_dma_status(self, mtype, vaiml=False):
if overlay_info is None and vaiml:
continue
overlay_info = "" if not overlay_info or not vaiml else f" ({overlay_info})"
self.results[mtype][rtype].append((name + overlay_info, c, r, hex(regdata), extra_meta, parsed_reg))
self.results[mtype][rtype].append(
(name + overlay_info, c, r, hex(regdata), extra_meta, parsed_reg)
)
channel += 1

def _append_bd_status(self, mtype, registers):
Expand Down Expand Up @@ -184,24 +187,18 @@ def _read_ddr_with_devmem(self, address, width=32):
if width == 64:
# Read 64-bit by reading two 32-bit values and combining.
result_low = subprocess.run(
["devmem2", hex(address)],
capture_output=True,
text=True,
check=True
["devmem2", hex(address)], capture_output=True, text=True, check=True
)
match_low = re.search(r':\s*(0x[0-9a-fA-F]+)', result_low.stdout)
match_low = re.search(r":\s*(0x[0-9a-fA-F]+)", result_low.stdout)
if not match_low:
print(f"[WARNING] Failed to parse devmem2 output for address {hex(address)}")
return None
low_val = int(match_low.group(1), 16)

result_high = subprocess.run(
["devmem2", hex(address + 4)],
capture_output=True,
text=True,
check=True
["devmem2", hex(address + 4)], capture_output=True, text=True, check=True
)
match_high = re.search(r':\s*(0x[0-9a-fA-F]+)', result_high.stdout)
match_high = re.search(r":\s*(0x[0-9a-fA-F]+)", result_high.stdout)
if not match_high:
print(f"[WARNING] Failed to parse devmem2 output for address {hex(address + 4)}")
return None
Expand All @@ -210,12 +207,9 @@ def _read_ddr_with_devmem(self, address, width=32):
return (high_val << 32) | low_val
else:
result = subprocess.run(
["devmem2", hex(address)],
capture_output=True,
text=True,
check=True
["devmem2", hex(address)], capture_output=True, text=True, check=True
)
match = re.search(r':\s*(0x[0-9a-fA-F]+)', result.stdout)
match = re.search(r":\s*(0x[0-9a-fA-F]+)", result.stdout)
if not match:
print(f"[WARNING] Failed to parse devmem2 output for address {hex(address)}")
return None
Expand Down Expand Up @@ -259,11 +253,11 @@ def _append_hsa_queue_status(self):

# Validate address - skip DDR reads for invalid addresses
# 0x0 indicates no queue, 0xffffffffffffffff indicates uninitialized/invalid
if hsa_queue_addr != 0 and hsa_queue_addr != 0xffffffffffffffff:
if hsa_queue_addr != 0 and hsa_queue_addr != 0xFFFFFFFFFFFFFFFF:
# Read queue information from DDR
read_index = self._read_ddr_with_devmem(hsa_queue_addr + 0x0, 64)
write_index = self._read_ddr_with_devmem(hsa_queue_addr + 0x10, 64)
queue_capacity = self._read_ddr_with_devmem(hsa_queue_addr + 0xc, 32)
queue_capacity = self._read_ddr_with_devmem(hsa_queue_addr + 0xC, 32)

if read_index is not None:
hsa_info.append(("HSA_READ_INDEX", read_index))
Expand Down Expand Up @@ -316,7 +310,9 @@ def _append_core_status(self, debug_mode=False):

if debug_mode:
dbg_ctrl_1 = hex(self.backend.read_register(c, r, regmap["DEBUG_CONTROL1"]))
self.results[mtype][cs_k].append((f"DBG_CTRL:{dbg_ctrl_1}", c, r, cs_val, f"PC:{cpc_val}", cs_parsed))
self.results[mtype][cs_k].append(
(f"DBG_CTRL:{dbg_ctrl_1}", c, r, cs_val, f"PC:{cpc_val}", cs_parsed)
)
else:
cs_parsed += f",PC:{cpc_val}"
self.results[mtype][cs_k].append((cs_k, c, r, cs_val, "", cs_parsed))
Expand Down Expand Up @@ -436,7 +432,15 @@ def update(self, tile_type=None, vaiml=False, advanced=False, debug_map_json=Non
# DMA in AIE, Shim and MEM Tiles
self._append_dma_status(ttype, vaiml)

def get(self, filename=None, tile_type=None, vaiml=False, advanced=False, debug_map_json=None, guidance=False):
def get(
self,
filename=None,
tile_type=None,
vaiml=False,
advanced=False,
debug_map_json=None,
guidance=False,
):
"""
Query, store, and print or save status for all requested tiles.

Expand Down Expand Up @@ -477,7 +481,11 @@ def _get_uc_status(self, debug_map_json=None):
key = (entry.get("page_offset"), entry.get("column"))
prev_map[key] = None
if prev_entry:
prev_map[key] = prev_entry.get("operation"), prev_entry.get("line"), prev_entry.get("file")
prev_map[key] = (
prev_entry.get("operation"),
prev_entry.get("line"),
prev_entry.get("file"),
)
prev_entry = entry
for uc_data in self.results[self.aie_iface.SHIM_TILE_T]["UC_STATUS"]:
d = dict(uc_data[3])
Expand Down Expand Up @@ -514,8 +522,7 @@ def get_uc_status(self, debug_map_json=None, guidance=False):
else:
print("UC Module is not present in this device.")

def run_guidance_checks(self, show_passed=False, show_guidance=True,
export_json=None):
def run_guidance_checks(self, show_passed=False, show_guidance=True, export_json=None):
"""
Run guidance checks on collected status data and display results.

Expand Down
24 changes: 12 additions & 12 deletions src/mldebug/aie_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,12 @@ def skip_iterations_to_lock_acq(self, lock_acq_pc, count, sid):
wait_until(self.impl.poll_core_status)

pcs = self.impl.read_core_pc(True)
is_valid = self.pcs_match_target(pcs, lock_acq_pc)
is_valid = self.pcs_match_target(pcs, lock_acq_pc)
if not is_valid:
LOGGER.log(
f"{sid}: Invalid result in skip_iterations_to_lock_acq. "
f"target_pc={lock_acq_pc} pcs={pcs} "
)
#else:
f"{sid}: Invalid result in skip_iterations_to_lock_acq. target_pc={lock_acq_pc} pcs={pcs} "
)
# else:
# LOGGER.log(
# f"{sid}: Successfully skipped to lock acq pc. "
# f"target_pc={lock_acq_pc} pcs={pcs} "
Expand Down Expand Up @@ -307,7 +306,9 @@ def break_combo(self):
true_core_event = self._get_eventid("TRUE_CORE")

# eventC==eventD means generate combo3 and reset state machine
combo_event_inputs = rising_edge_event + (true_core_event << 8) + (pc_event << 16) + (pc_event << 24)
combo_event_inputs = (
rising_edge_event + (true_core_event << 8) + (pc_event << 16) + (pc_event << 24)
)
self.impl.write_aie_regs(reg_map["DEBUG_CONTROL1"], combo_3_event << 16)
self.impl.write_aie_regs(reg_map["COMBO_EVENT_INPUTS_A_D"], combo_event_inputs)

Expand Down Expand Up @@ -359,7 +360,7 @@ def check_errors(self, layer, itr):
self._error_found = True

# Check secondary error event register if it exists (NPU3 only)
if hasattr(aif, 'ERRORS_EVENT_REG2'):
if hasattr(aif, "ERRORS_EVENT_REG2"):
for c, r in self._filter_tiles(aif.AIE_TILE_T):
data = self.impl.read_register(c, r, aif.Core_registers[aif.ERRORS_EVENT_REG2])
parsed = aif.parse_register(aif.ERRORS_EVENT_REG2, data)
Expand Down Expand Up @@ -393,7 +394,6 @@ def check_errors(self, layer, itr):
)
print()


def write_aie_regs(self, offset, val):
"""
Write a value to all AIE tile registers
Expand Down Expand Up @@ -444,7 +444,7 @@ def single_step_core(self, c, r):
Single step an aie core
"""
offset = self.aie_iface.Core_registers["DEBUG_CONTROL0"]
self.impl.write_register(c, r, offset, (1<<2))
self.impl.write_register(c, r, offset, (1 << 2))

def disable_ecc_event(self):
"""
Expand Down Expand Up @@ -474,17 +474,17 @@ def pcs_match_target(self, pcs, target_pc, allow_combo_delay=False):
for tile, val in pc_dict.items():
if target_pc == val:
continue
#print(f"Try to reconcile tile {tile} {val}")
# print(f"Try to reconcile tile {tile} {val}")
col, row = tile
for _ in range(num_pipeline_stages):
self.single_step_core(col, row)
newpc = self.read_core_pc_tile(col, row)
delta = newpc - target_pc
if target_pc == newpc or max_pc_tolerance > delta > 0 :
if target_pc == newpc or max_pc_tolerance > delta > 0:
break
# if core pc is slightly ahead, we should be okay
# but if not, execution can run into trouble later
if target_pc > self.read_core_pc_tile(col, row):
return False
#print("Successfully reconciled")
# print("Successfully reconciled")
return True
2 changes: 1 addition & 1 deletion src/mldebug/arch/aie2p_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ def parse_overlay():
overlay[t] = tile["dma_connectivity"]
except FileNotFoundError:
# Return empty overlay if not supported
#print("Overlay info not found for this Device.")
# print("Overlay info not found for this Device.")
return {}

return overlay
Expand Down
2 changes: 1 addition & 1 deletion src/mldebug/arch/aie2ps_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ def parse_overlay():
overlay[t] = tile["dma_connectivity"]
except FileNotFoundError:
# Return empty overlay if not supported
#print("Overlay info not found for this Device.")
# print("Overlay info not found for this Device.")
return {}

return overlay
Expand Down
1 change: 1 addition & 0 deletions src/mldebug/arch/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
AIE_DEV_TEL = "telluride"
AIE_DEV_NPU3 = "npu3"


def load_aie_arch(device):
"""
return specific aie arch module based on name
Expand Down
2 changes: 1 addition & 1 deletion src/mldebug/backend/backend_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def single_step(self, num_instr=1):
"""

@abstractmethod
def read_aie_regs(self, reg)-> list[int]:
def read_aie_regs(self, reg) -> list[int]:
"""
Reads a register in all of debug aie cores

Expand Down
Loading
Loading