From 411b0f347950f4248a5ab1c42c7699de9bd3819a Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 28 May 2026 14:59:01 +0200 Subject: [PATCH 1/3] iOS device startup: upload .logarchive and pull crash reports on cleanup failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When post-iteration app cleanup fails (devicectl 'No such process'), the app already exited before we could terminate it. To diagnose why — crash, iOS- initiated termination, or natural exit: 1. Zip and upload the iOS device's .logarchive to the Helix results container 2. Pull device-side crash reports (.ips) via xcrun devicectl Investigative-only: triggers only on the cleanup failure that already terminates the iteration. Cherry-picked from matouskozak/ios-sdk-jobs-macos-26 (71bfd4a1, 3101d30c). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/scenarios/shared/runner.py | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/scenarios/shared/runner.py b/src/scenarios/shared/runner.py index eb0cea96a7c..26f1dad5fe5 100644 --- a/src/scenarios/shared/runner.py +++ b/src/scenarios/shared/runner.py @@ -14,7 +14,7 @@ from logging import getLogger from argparse import ArgumentParser from argparse import RawTextHelpFormatter -from shutil import rmtree +from shutil import make_archive, rmtree from typing import Optional from shared.androidhelper import AndroidHelper from shared.androidinstrumentation import AndroidInstrumentationHelper @@ -843,7 +843,41 @@ def run(self): '--devname', deviceUDID ] killCmdCommand = RunCommand(killCmd, verbose=True) - killCmdCommand.run() + try: + killCmdCommand.run() + except CalledProcessError as ex: + # The kill is cleanup-only; the measurement data is already in the .logarchive above. + # devicectl returns non-zero when the app process is already gone (e.g. iOS terminated + # it, the app crashed, or it self-exited). Upload the .logarchive AND any device-side + # crash reports for this bundle to the Helix results container so we can diagnose + # why the app was already gone before re-raising. + getLogger().warning(f"App kill failed (app may have already exited): {ex}") + upload_root = os.environ.get('HELIX_WORKITEM_UPLOAD_ROOT') + if upload_root: + if os.path.exists(logarchive_filename): + archive_base = os.path.join(upload_root, f'iteration{i}.logarchive') + try: + getLogger().info(f"Saving {logarchive_filename} to {archive_base}.zip for diagnosis.") + make_archive(archive_base, 'zip', root_dir=logarchive_filename) + except Exception as upload_ex: + getLogger().warning(f"Failed to save logarchive for diagnosis: {upload_ex}") + # Pull any iOS crash reports (.ips) for our bundle from the device into the upload root. + # The systemCrashLogs domain on devicectl exposes /var/mobile/Library/Logs/CrashReporter/. + crash_dest = os.path.join(upload_root, f'iteration{i}_crashlogs') + os.makedirs(crash_dest, exist_ok=True) + crashCopyCmd = [ + 'xcrun', 'devicectl', 'device', 'copy', 'from', + '--device', deviceUDID, + '--domain-type', 'systemCrashLogs', + '--source', '/', + '--destination', crash_dest, + ] + try: + getLogger().info(f"Copying device crash logs to {crash_dest} for diagnosis.") + RunCommand(crashCopyCmd, verbose=True).run() + except Exception as crash_ex: + getLogger().warning(f"Failed to copy device crash logs for diagnosis: {crash_ex}") + raise # Process Data From 50e6dfe584a64d7107e76b0e9598752dd3bf27ee Mon Sep 17 00:00:00 2001 From: matouskozak Date: Fri, 29 May 2026 10:59:19 +0200 Subject: [PATCH 2/3] Address review: scope makedirs and filter crash logs - Wrap os.makedirs into the same try/except as the crash copy so a diagnostic-path failure cannot mask the original CalledProcessError from the failed app kill. - Prune the systemCrashLogs copy after devicectl to entries relevant to this iteration: keep files matching the bundle name or with mtime >= the iteration's log-collect start. Drops dozens of unrelated .ips files (WiFiLQMMetrics, wifip2pd, old crashes, etc.) per upload. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/scenarios/shared/runner.py | 41 +++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/scenarios/shared/runner.py b/src/scenarios/shared/runner.py index 26f1dad5fe5..7d0a545f887 100644 --- a/src/scenarios/shared/runner.py +++ b/src/scenarios/shared/runner.py @@ -861,20 +861,39 @@ def run(self): make_archive(archive_base, 'zip', root_dir=logarchive_filename) except Exception as upload_ex: getLogger().warning(f"Failed to save logarchive for diagnosis: {upload_ex}") - # Pull any iOS crash reports (.ips) for our bundle from the device into the upload root. - # The systemCrashLogs domain on devicectl exposes /var/mobile/Library/Logs/CrashReporter/. - crash_dest = os.path.join(upload_root, f'iteration{i}_crashlogs') - os.makedirs(crash_dest, exist_ok=True) - crashCopyCmd = [ - 'xcrun', 'devicectl', 'device', 'copy', 'from', - '--device', deviceUDID, - '--domain-type', 'systemCrashLogs', - '--source', '/', - '--destination', crash_dest, - ] + # Pull iOS crash reports (.ips) from the device into the upload root, then prune + # to entries relevant to this iteration (matching bundle name OR generated since + # the iteration started). The systemCrashLogs domain exposes /var/mobile/Library/ + # Logs/CrashReporter/, which on shared devices accumulates unrelated reports. try: + crash_dest = os.path.join(upload_root, f'iteration{i}_crashlogs') + os.makedirs(crash_dest, exist_ok=True) + crashCopyCmd = [ + 'xcrun', 'devicectl', 'device', 'copy', 'from', + '--device', deviceUDID, + '--domain-type', 'systemCrashLogs', + '--source', '/', + '--destination', crash_dest, + ] getLogger().info(f"Copying device crash logs to {crash_dest} for diagnosis.") RunCommand(crashCopyCmd, verbose=True).run() + bundle_name = os.path.splitext(os.path.basename(os.path.normpath(self.packagepath)))[0] + iteration_start_ts = runCmdTimestamp.timestamp() + kept = removed = 0 + for root, _, files in os.walk(crash_dest): + for fname in files: + fpath = os.path.join(root, fname) + try: + is_bundle_match = bool(bundle_name) and bundle_name in fname + is_recent = os.path.getmtime(fpath) >= iteration_start_ts + if is_bundle_match or is_recent: + kept += 1 + else: + os.remove(fpath) + removed += 1 + except OSError: + pass + getLogger().info(f"Kept {kept} crash log(s) relevant to {bundle_name!r} or this iteration; pruned {removed}.") except Exception as crash_ex: getLogger().warning(f"Failed to copy device crash logs for diagnosis: {crash_ex}") raise From bad02e66f9804fd2b4be450539b573763d48cbca Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 8 Jun 2026 18:00:44 +0100 Subject: [PATCH 3/3] Address review: use mlaunch crash snapshot diff for iOS device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace devicectl-based copy-everything-then-filter approach with XHarness's CrashSnapshotReporter pattern, ported to Python: * Before the iteration loop, run 'mlaunch --list-crash-reports' to capture the device's existing crash report set. * In the kill-failure handler, re-list and download only the diff via 'mlaunch --download-crash-report --download-crash-report-to'. * Poll the final snapshot for up to 60s so iOS has time to finish writing the crash report after the process dies (matches CrashSnapshotReporter.EndCaptureAsync). The previous bundle/mtime filter still uploaded the historical backlog of unrelated crashes that accumulate on shared Mac.iPhone.17.Perf devices (8-20 stale .ips per work item seen in build 2986965). The snapshot diff scopes uploads to only the crashes generated during this test run. It's a list+copy, not a move — device state is unchanged, matching XHarness behaviour. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/scenarios/shared/runner.py | 108 ++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/src/scenarios/shared/runner.py b/src/scenarios/shared/runner.py index 7d0a545f887..3f8ca060f8f 100644 --- a/src/scenarios/shared/runner.py +++ b/src/scenarios/shared/runner.py @@ -6,6 +6,7 @@ import os import glob import re +import tempfile import time import json @@ -767,6 +768,43 @@ def run(self): RunCommand(installCmd, verbose=True).run() getLogger().info("Completed install.") + # The Mac.iPhone.17.Perf devices in the Helix pool are shared across many test + # runs over days, and iOS retains crash reports in /var/mobile/Library/Logs/ + # CrashReporter/ until rotated out. To avoid re-uploading that historical + # backlog on a kill failure, snapshot the device's current crash report set + # now and below upload only reports that appeared since this snapshot. + # This mirrors XHarness's CrashSnapshotReporter pattern (it's a copy, not a + # move — device state is unchanged). + def listDeviceCrashReports(): + """Return the set of crash report identifiers currently on the device, + or None if listing failed.""" + listFilePath = None + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.list', delete=False) as listFile: + listFilePath = listFile.name + listCmd = xharnesscommand() + [ + 'apple', 'mlaunch', '--', + f'--list-crash-reports={listFilePath}', + '--devname', deviceUDID, + ] + RunCommand(listCmd, verbose=True).run() + with open(listFilePath) as f: + return {line.strip() for line in f if line.strip()} + except Exception as listEx: + getLogger().warning(f"Failed to list device crash reports: {listEx}") + return None + finally: + if listFilePath and os.path.exists(listFilePath): + try: + os.remove(listFilePath) + except OSError: + pass + + getLogger().info("Snapshotting existing crash reports on device.") + initial_device_crashes = listDeviceCrashReports() + if initial_device_crashes is not None: + getLogger().info(f"Found {len(initial_device_crashes)} pre-existing crash report(s).") + allResults = [] timeToFirstDrawEventEndDateTime = datetime.now() + timedelta(minutes=-10) # This is used to keep track of the latest time to draw end event, we use this to calculate time to draw and also as a reference point for the next iteration log time. for i in range(self.startupiterations + 1): # adding one iteration to account for the warmup iteration @@ -861,41 +899,43 @@ def run(self): make_archive(archive_base, 'zip', root_dir=logarchive_filename) except Exception as upload_ex: getLogger().warning(f"Failed to save logarchive for diagnosis: {upload_ex}") - # Pull iOS crash reports (.ips) from the device into the upload root, then prune - # to entries relevant to this iteration (matching bundle name OR generated since - # the iteration started). The systemCrashLogs domain exposes /var/mobile/Library/ - # Logs/CrashReporter/, which on shared devices accumulates unrelated reports. - try: - crash_dest = os.path.join(upload_root, f'iteration{i}_crashlogs') - os.makedirs(crash_dest, exist_ok=True) - crashCopyCmd = [ - 'xcrun', 'devicectl', 'device', 'copy', 'from', - '--device', deviceUDID, - '--domain-type', 'systemCrashLogs', - '--source', '/', - '--destination', crash_dest, - ] - getLogger().info(f"Copying device crash logs to {crash_dest} for diagnosis.") - RunCommand(crashCopyCmd, verbose=True).run() - bundle_name = os.path.splitext(os.path.basename(os.path.normpath(self.packagepath)))[0] - iteration_start_ts = runCmdTimestamp.timestamp() - kept = removed = 0 - for root, _, files in os.walk(crash_dest): - for fname in files: - fpath = os.path.join(root, fname) + # Take a final snapshot and download only crash reports that appeared + # since the initial snapshot taken before the iteration loop. This + # matches XHarness's CrashSnapshotReporter pattern and avoids uploading + # the historical backlog of unrelated crashes the shared device retains. + # iOS may take a few seconds to finish writing a crash report after the + # process dies, so poll the snapshot for up to 60s waiting for new + # entries to appear (matches CrashSnapshotReporter.EndCaptureAsync). + if initial_device_crashes is None: + getLogger().info("Skipping device crash log download (initial snapshot unavailable).") + else: + crash_wait_deadline = time.time() + 60 + final_device_crashes = listDeviceCrashReports() + new_crashes = sorted(final_device_crashes - initial_device_crashes) if final_device_crashes is not None else [] + while final_device_crashes is not None and not new_crashes and time.time() < crash_wait_deadline: + time.sleep(1) + final_device_crashes = listDeviceCrashReports() + new_crashes = sorted(final_device_crashes - initial_device_crashes) if final_device_crashes is not None else [] + if final_device_crashes is None: + getLogger().warning("Skipping device crash log download (final snapshot failed).") + elif not new_crashes: + getLogger().info("No new crash reports on device for this test run.") + else: + crash_dest = os.path.join(upload_root, f'iteration{i}_crashlogs') + os.makedirs(crash_dest, exist_ok=True) + getLogger().info(f"Downloading {len(new_crashes)} new crash report(s) to {crash_dest}.") + for crash_id in new_crashes: + dst = os.path.join(crash_dest, os.path.basename(crash_id)) + dlCmd = xharnesscommand() + [ + 'apple', 'mlaunch', '--', + f'--download-crash-report={crash_id}', + f'--download-crash-report-to={dst}', + '--devname', deviceUDID, + ] try: - is_bundle_match = bool(bundle_name) and bundle_name in fname - is_recent = os.path.getmtime(fpath) >= iteration_start_ts - if is_bundle_match or is_recent: - kept += 1 - else: - os.remove(fpath) - removed += 1 - except OSError: - pass - getLogger().info(f"Kept {kept} crash log(s) relevant to {bundle_name!r} or this iteration; pruned {removed}.") - except Exception as crash_ex: - getLogger().warning(f"Failed to copy device crash logs for diagnosis: {crash_ex}") + RunCommand(dlCmd, verbose=True).run() + except Exception as dlEx: + getLogger().warning(f"Failed to download crash report {crash_id}: {dlEx}") raise # Process Data