From 0510b01b6d4fe0424b3bae2353a308054429f910 Mon Sep 17 00:00:00 2001 From: Keith Sheppard Date: Tue, 23 Jun 2026 16:33:14 -0400 Subject: [PATCH] reject orphan identity references at preflight; add --tolerate-orphan-identities --- docs/user-guide/cli-tools.md | 10 +- .../resources/docs/user_guide/cli-tools.md | 10 +- src/jabs/scripts/cli/update_labels.py | 45 ++- src/jabs/scripts/cli/update_pose.py | 187 +++++++++++- tests/project/test_update_labels.py | 68 ++++- tests/project/test_update_pose.py | 277 +++++++++++++++++- 6 files changed, 575 insertions(+), 22 deletions(-) diff --git a/docs/user-guide/cli-tools.md b/docs/user-guide/cli-tools.md index 6799bb6f..94be758f 100644 --- a/docs/user-guide/cli-tools.md +++ b/docs/user-guide/cli-tools.md @@ -188,7 +188,7 @@ The `jabs-cli update-pose` command updates an existing JABS project to use updat **Usage:** ```bash -jabs-cli update-pose [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--skip-feature-gen] +jabs-cli update-pose [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--skip-feature-gen] [--tolerate-orphan-identities] ``` - ``: Path to the JABS project to update in place. @@ -198,9 +198,12 @@ jabs-cli update-pose [--min-iou-thresh ] - `--annotate-failures`: Add timeline annotations to the project for blocks whose label remap fails. - `--drop-timeline-annotations`: Discard existing timeline annotations from the source project instead of copying or remapping them. - `--skip-feature-gen`: Skip automatic feature regeneration after a successful pose update. +- `--tolerate-orphan-identities`: Continue past orphan-identity references in the live project's annotations (warn instead of erroring at preflight). Affected `(identity, behavior)` pairs are skipped during remap; all other labels are remapped normally. By default the command aborts at preflight when orphans are detected. Before modifying the project, the command validates the updated pose files, runs the pose update and label remap in disposable staging projects, and creates a timestamped backup zip under `/.backup`. By default, existing timeline annotations are also carried forward: video-level annotations are copied as-is, and identity-scoped annotations are remapped by the same interval-matching logic used for label blocks. Use `--drop-timeline-annotations` if you want to discard existing timeline annotations instead. Only after the staged pose update succeeds are annotations, project metadata, and pose files copied back into the project. +**Orphan identity check.** Preflight rejects projects whose annotation files reference identity indices that the corresponding pose file no longer supplies — typically a sign the pose was regenerated with fewer identities, leaving stale label entries that can no longer be matched against pose data. The command lists every offending video and identity, then exits without modifying anything. Clean up the affected annotation files (drop the orphan identity entries from each `.json`) and re-run, or pass `--tolerate-orphan-identities` to warn-and-skip the affected entries instead of aborting. + After a successful live pose update, features are regenerated automatically only when `--skip-feature-gen` is not passed and the existing `jabs/project.json` already contains explicit `window_sizes`. If the project file has no `window_sizes` entry, there is nothing to regenerate and feature generation is skipped. If you want more control over feature regeneration, use `--skip-feature-gen` and run `jabs-init` manually, or generate features from the GUI. **Example:** @@ -218,7 +221,7 @@ The `jabs-cli update-labels` command is the inverse of [`update-pose`](#jabs-cli **Usage:** ```bash -jabs-cli update-labels [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] +jabs-cli update-labels [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--tolerate-orphan-identities] ``` - ``: Path to the target JABS project whose labels will be replaced in place. The target's pose is unchanged. If `` is a directory of videos + pose files with no `jabs/` subdirectory, a minimal JABS project is scaffolded automatically — features are not generated, so you may want to run `jabs-init` separately afterwards. @@ -227,11 +230,14 @@ jabs-cli update-labels [--min-iou-thresh /.backup` covering `jabs/project.json`, annotations, and predictions (pose files are not touched). Labels are processed block by block, matched by median bbox IoU between the source pose and the target's existing pose, and written to the staged destination label track. By default, source timeline annotations are also carried forward and remapped the same way as label blocks; use `--drop-timeline-annotations` to discard them. Existing target labels for videos that the source does not cover are left untouched (per-video replace). Behaviors named in the source's `project.json` but not present in the target are merged into the target's `project.json` so the imported labels are usable in the GUI; behaviors already configured in the target keep their existing settings. +**Orphan identity check.** Preflight rejects the run if any source annotation file references identity indices that the source's own pose file no longer supplies. This typically means the source's pose was regenerated with fewer identities and the affected label entries are stranded — there is no source-side bounding-box data to anchor the IoU match against. The command lists every offending video and identity, then exits without modifying the target. Clean up the source annotation files (drop the orphan identity entries from each `.json`) and re-run, or pass `--tolerate-orphan-identities` to warn-and-skip the affected entries instead of aborting. + The target's pose is unchanged, so the feature cache stays valid and is **not** regenerated. Predictions are cleared because they are stale relative to the new labels; classifiers, the performance cache, and feature files are all left in place. If you need to retrain after a label import, run training from the GUI or via `jabs-classify`. If a failure occurs after the live apply begins, the command prints the backup path plus cleanup and manual restore instructions instead of restoring automatically. diff --git a/src/jabs/resources/docs/user_guide/cli-tools.md b/src/jabs/resources/docs/user_guide/cli-tools.md index 6799bb6f..94be758f 100644 --- a/src/jabs/resources/docs/user_guide/cli-tools.md +++ b/src/jabs/resources/docs/user_guide/cli-tools.md @@ -188,7 +188,7 @@ The `jabs-cli update-pose` command updates an existing JABS project to use updat **Usage:** ```bash -jabs-cli update-pose [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--skip-feature-gen] +jabs-cli update-pose [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--skip-feature-gen] [--tolerate-orphan-identities] ``` - ``: Path to the JABS project to update in place. @@ -198,9 +198,12 @@ jabs-cli update-pose [--min-iou-thresh ] - `--annotate-failures`: Add timeline annotations to the project for blocks whose label remap fails. - `--drop-timeline-annotations`: Discard existing timeline annotations from the source project instead of copying or remapping them. - `--skip-feature-gen`: Skip automatic feature regeneration after a successful pose update. +- `--tolerate-orphan-identities`: Continue past orphan-identity references in the live project's annotations (warn instead of erroring at preflight). Affected `(identity, behavior)` pairs are skipped during remap; all other labels are remapped normally. By default the command aborts at preflight when orphans are detected. Before modifying the project, the command validates the updated pose files, runs the pose update and label remap in disposable staging projects, and creates a timestamped backup zip under `/.backup`. By default, existing timeline annotations are also carried forward: video-level annotations are copied as-is, and identity-scoped annotations are remapped by the same interval-matching logic used for label blocks. Use `--drop-timeline-annotations` if you want to discard existing timeline annotations instead. Only after the staged pose update succeeds are annotations, project metadata, and pose files copied back into the project. +**Orphan identity check.** Preflight rejects projects whose annotation files reference identity indices that the corresponding pose file no longer supplies — typically a sign the pose was regenerated with fewer identities, leaving stale label entries that can no longer be matched against pose data. The command lists every offending video and identity, then exits without modifying anything. Clean up the affected annotation files (drop the orphan identity entries from each `.json`) and re-run, or pass `--tolerate-orphan-identities` to warn-and-skip the affected entries instead of aborting. + After a successful live pose update, features are regenerated automatically only when `--skip-feature-gen` is not passed and the existing `jabs/project.json` already contains explicit `window_sizes`. If the project file has no `window_sizes` entry, there is nothing to regenerate and feature generation is skipped. If you want more control over feature regeneration, use `--skip-feature-gen` and run `jabs-init` manually, or generate features from the GUI. **Example:** @@ -218,7 +221,7 @@ The `jabs-cli update-labels` command is the inverse of [`update-pose`](#jabs-cli **Usage:** ```bash -jabs-cli update-labels [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] +jabs-cli update-labels [--min-iou-thresh ] [--verbose] [--annotate-failures] [--drop-timeline-annotations] [--tolerate-orphan-identities] ``` - ``: Path to the target JABS project whose labels will be replaced in place. The target's pose is unchanged. If `` is a directory of videos + pose files with no `jabs/` subdirectory, a minimal JABS project is scaffolded automatically — features are not generated, so you may want to run `jabs-init` separately afterwards. @@ -227,11 +230,14 @@ jabs-cli update-labels [--min-iou-thresh /.backup` covering `jabs/project.json`, annotations, and predictions (pose files are not touched). Labels are processed block by block, matched by median bbox IoU between the source pose and the target's existing pose, and written to the staged destination label track. By default, source timeline annotations are also carried forward and remapped the same way as label blocks; use `--drop-timeline-annotations` to discard them. Existing target labels for videos that the source does not cover are left untouched (per-video replace). Behaviors named in the source's `project.json` but not present in the target are merged into the target's `project.json` so the imported labels are usable in the GUI; behaviors already configured in the target keep their existing settings. +**Orphan identity check.** Preflight rejects the run if any source annotation file references identity indices that the source's own pose file no longer supplies. This typically means the source's pose was regenerated with fewer identities and the affected label entries are stranded — there is no source-side bounding-box data to anchor the IoU match against. The command lists every offending video and identity, then exits without modifying the target. Clean up the source annotation files (drop the orphan identity entries from each `.json`) and re-run, or pass `--tolerate-orphan-identities` to warn-and-skip the affected entries instead of aborting. + The target's pose is unchanged, so the feature cache stays valid and is **not** regenerated. Predictions are cleared because they are stale relative to the new labels; classifiers, the performance cache, and feature files are all left in place. If you need to retrain after a label import, run training from the GUI or via `jabs-classify`. If a failure occurs after the live apply begins, the command prints the backup path plus cleanup and manual restore instructions instead of restoring automatically. diff --git a/src/jabs/scripts/cli/update_labels.py b/src/jabs/scripts/cli/update_labels.py index 6d93f81f..17fed7d7 100644 --- a/src/jabs/scripts/cli/update_labels.py +++ b/src/jabs/scripts/cli/update_labels.py @@ -54,6 +54,8 @@ from .update_pose import ( _create_backup_archive, + _handle_orphan_identities, + _orphan_identities_in_annotation, _print_manual_restore, _project_videos, _restore_cleanup_paths, @@ -160,6 +162,8 @@ def _scaffold_target_project_if_missing(target_dir: Path) -> bool: def _preflight_label_update_inputs( target_dir: Path, source_project_dir: Path, + *, + tolerate_orphan_identities: bool = False, ) -> tuple[list[str], set[str]]: """Validate target and source inputs, scaffolding the target if needed. @@ -171,6 +175,15 @@ def _preflight_label_update_inputs( features, or pose changes — so on any subsequent failure the user is left with a harmless no-op directory that the next run reuses. + Args: + target_dir: Path to the target JABS project (or a videos+pose dir to scaffold). + source_project_dir: Path to the source JABS project providing labels. + tolerate_orphan_identities: When ``True``, orphan identity references in + the source's annotations are warned about and remap continues, with + the affected ``(identity, behavior)`` pairs skipped during remap. + When ``False`` (default), the orphan check raises before any backup + or mutation. + Returns: Tuple ``(videos, live_annotation_videos)`` where ``videos`` is the sorted list of source-labeled videos to operate on (each also exists in the @@ -205,10 +218,13 @@ def _preflight_label_update_inputs( f"Source-labeled videos missing from target project: {', '.join(missing_in_target)}" ) + source_annotations_dir = source_project_dir / "jabs" / "annotations" + orphan_issues: list[tuple[str, int, list[int]]] = [] + for video in labeled_videos: source_video_path = source_project_dir / video source_pose_path = get_pose_path(source_video_path) - _validate_pose_file( + _, source_num_identities = _validate_pose_file( video, source_video_path, source_pose_path, @@ -226,6 +242,15 @@ def _preflight_label_update_inputs( require_bboxes=True, ) + annotation_path = source_annotations_dir / Path(video).with_suffix(".json") + orphans = _orphan_identities_in_annotation(annotation_path, source_num_identities) + if orphans: + orphan_issues.append((video, source_num_identities, orphans)) + + _handle_orphan_identities( + orphan_issues, source_label="source", tolerate=tolerate_orphan_identities + ) + target_annotations_dir = target_dir / "jabs" / "annotations" live_annotation_videos: set[str] = set() if target_annotations_dir.exists(): @@ -369,6 +394,7 @@ def update_project_labels_in_place( verbose: bool = False, annotate_failures: bool = False, drop_timeline_annotations: bool = False, + tolerate_orphan_identities: bool = False, ) -> tuple[int, int, Path, list[str]]: """Update a live project in place by replacing its labels from ``source_project_dir``. @@ -384,6 +410,10 @@ def update_project_labels_in_place( annotate_failures: Whether to write timeline annotations for failed block matches. drop_timeline_annotations: Whether to discard source timeline annotations instead of copying or remapping them. + tolerate_orphan_identities: When ``True``, warn about orphan identity + references in the source's annotations and skip the affected + ``(identity, behavior)`` pairs during remap instead of aborting at + preflight. Returns: Tuple ``(total_success, total_skipped, backup_path, newly_added_behaviors)`` @@ -398,6 +428,7 @@ def update_project_labels_in_place( labeled_videos, _live_annotation_videos = _preflight_label_update_inputs( project_dir, source_project_dir, + tolerate_orphan_identities=tolerate_orphan_identities, ) backup_path = _create_backup_archive( project_dir, @@ -501,6 +532,16 @@ def update_project_labels_in_place( is_flag=True, help="Discard source timeline annotations instead of copying or remapping them.", ) +@click.option( + "--tolerate-orphan-identities", + is_flag=True, + help=( + "Continue past orphan identity references in the source's annotation " + "files (warn instead of erroring at preflight). The affected " + "(identity, behavior) pairs are skipped during remap; all other labels " + "are imported normally. By default, the command aborts at preflight." + ), +) def update_labels_command( project: Path, source_project: Path, @@ -508,6 +549,7 @@ def update_labels_command( verbose: bool, annotate_failures: bool, drop_timeline_annotations: bool, + tolerate_orphan_identities: bool, ) -> None: """Update a JABS project in place by replacing labels imported from another project.""" total_success, total_skipped, backup_path, newly_added_behaviors = ( @@ -518,6 +560,7 @@ def update_labels_command( verbose=verbose, annotate_failures=annotate_failures, drop_timeline_annotations=drop_timeline_annotations, + tolerate_orphan_identities=tolerate_orphan_identities, ) ) diff --git a/src/jabs/scripts/cli/update_pose.py b/src/jabs/scripts/cli/update_pose.py index 478dfdb3..2e409187 100644 --- a/src/jabs/scripts/cli/update_pose.py +++ b/src/jabs/scripts/cli/update_pose.py @@ -383,6 +383,24 @@ def _remap_labels_for_video( ) continue + # Source's annotation references an identity its own pose can no longer + # supply. Under normal strict operation this is caught earlier by + # ``_handle_orphan_identities`` in preflight; this inline guard handles + # the ``--tolerate-orphan-identities`` mode where preflight only warns. + # All blocks for this (identity, behavior) pair are counted as skipped + # with a single per-pair warning; no failure annotation is written + # because the root cause is in the source project. + if src_identity >= source_pose.num_identities: + orphan_block_count = sum(1 for _ in track_labels.get_blocks()) + print( + f"WARNING: {video} src_id={src_identity} not present in source pose " + f"(only {source_pose.num_identities} identities); skipping " + f"{orphan_block_count} block(s) for behavior={behavior}.", + file=sys.stderr, + ) + skipped_count += orphan_block_count + continue + for block in track_labels.get_blocks(): start, end, present = block["start"], block["end"], block["present"] dst_identity, iou = _find_best_identity( @@ -452,8 +470,12 @@ def _validate_pose_file( pose_path: Path, role: str, require_bboxes: bool, -) -> int: - """Validate that a pose file is readable and matches the video frame count.""" +) -> tuple[int, int]: + """Validate that a pose file is readable and matches the video frame count. + + Returns: + Tuple of ``(format_major_version, num_identities)``. + """ pose = open_pose_file(pose_path, None) version = getattr(pose, "format_major_version", 0) if require_bboxes and (version < 8 or not getattr(pose, "has_bounding_boxes", False)): @@ -466,7 +488,122 @@ def _validate_pose_file( f"{video} {role} pose frame count ({pose_frames}) does not match video ({video_frames})" ) - return int(version) + return int(version), int(pose.num_identities) + + +def _orphan_identities_in_annotation( + annotation_path: Path, + num_identities: int, +) -> list[int]: + """Return identity indices referenced in the annotation file that are orphaned. + + An identity index is orphaned when it is ``>= num_identities`` — i.e. the + pose file does not have a corresponding row of pose data for it. + + Inspects both per-identity label tracks (the ``labels``/``unfragmented_labels`` + sections) and identity-scoped timeline annotations (the optional ``identity`` + field on each entry of ``annotations``). Non-integer or null identity values + are ignored — they refer to video-level annotations or are otherwise out of + scope for this check. + + Args: + annotation_path: Path to a JABS annotation JSON file. Returns an empty + list if the file does not exist or cannot be parsed. + num_identities: The corresponding pose file's ``num_identities``. Any + identity index ``>= num_identities`` is reported as orphaned. + + Returns: + Sorted, deduplicated list of orphan identity indices. + """ + try: + with annotation_path.open() as f: + data = json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return [] + + orphans: set[int] = set() + + label_key = "unfragmented_labels" if "unfragmented_labels" in data else "labels" + for identity_key in data.get(label_key, {}): + try: + identity_index = int(identity_key) + except (TypeError, ValueError): + continue + if identity_index >= num_identities: + orphans.add(identity_index) + + for annotation in data.get("annotations", []): + identity = annotation.get("identity") + if identity is None: + continue + try: + identity_index = int(identity) + except (TypeError, ValueError): + continue + if identity_index >= num_identities: + orphans.add(identity_index) + + return sorted(orphans) + + +def _handle_orphan_identities( + issues: list[tuple[str, int, list[int]]], + *, + source_label: str = "source", + tolerate: bool = False, +) -> None: + """Report orphan-identity issues, either by raising or by warning to stderr. + + Args: + issues: List of ``(video, pose_num_identities, orphan_identity_indices)`` + tuples accumulated during preflight. Empty list is a no-op. + source_label: Human label for the side that owns the broken data + (e.g. ``"source"`` or ``"live"``). Used in the message. + tolerate: When ``True``, write a consolidated warning to stderr and let + the caller proceed. The remap loop's inline guard then skips any + affected ``(identity, behavior)`` pairs as they are encountered. + When ``False`` (default), raise a ``ValueError`` so the run aborts + before any backup or mutation. + """ + if not issues: + return + + intro = ( + f"{source_label.capitalize()} project has orphan identity references " + "that cannot be matched against pose data:" + ) + lines = [intro] + for video, num_identities, orphans in issues: + orphans_str = ", ".join(str(i) for i in orphans) + lines.append( + f" {video}: {source_label} pose has {num_identities} identities " + f"(valid indices 0 to {num_identities - 1}) but annotation references " + f"identity {orphans_str}" + ) + lines.append("") + lines.append( + "These identity indices have no corresponding entries in the pose file. " + "Typically the pose was regenerated with fewer identities, leaving stale " + "label entries that can no longer be matched against pose data." + ) + + if tolerate: + lines.append("") + lines.append( + "Proceeding because --tolerate-orphan-identities is set. The affected " + "(identity, behavior) pairs will be skipped; all other labels are " + "remapped normally." + ) + print("WARNING: " + "\n".join(lines), file=sys.stderr) + return + + lines.append("") + lines.append( + f"Clean up the {source_label} annotation files (drop the orphan identity " + "entries from the affected .json files) and re-run, or pass " + "--tolerate-orphan-identities to skip the affected entries instead." + ) + raise ValueError("\n".join(lines)) def _validate_live_update_targets( @@ -529,8 +666,20 @@ def _validate_live_update_targets( def _preflight_update_inputs( project_dir: Path, new_pose_dir: Path, + *, + tolerate_orphan_identities: bool = False, ) -> tuple[list[str], dict[str, Path], set[str]]: - """Validate live and replacement inputs before any live mutation occurs.""" + """Validate live and replacement inputs before any live mutation occurs. + + Args: + project_dir: Live project directory. + new_pose_dir: Directory containing replacement pose files. + tolerate_orphan_identities: When ``True``, orphan identity references in + the live project's annotation files are warned about and remap + continues, with the affected ``(identity, behavior)`` pairs skipped + during remap. When ``False`` (default), the orphan check raises + before any backup or mutation. + """ if not Project.is_valid_project_directory(project_dir): raise ValueError(f"{project_dir} is not a valid JABS project directory") if not new_pose_dir.is_dir(): @@ -544,20 +693,21 @@ def _preflight_update_inputs( live_annotations: set[str] = set() annotations_dir = project_dir / "jabs" / "annotations" replacement_version: int | None = None + orphan_issues: list[tuple[str, int, list[int]]] = [] for video in videos: video_path = project_dir / video live_pose_path = get_pose_path(video_path) replacement_pose_path = get_pose_path(video_path, new_pose_dir) - _validate_pose_file( + _, live_num_identities = _validate_pose_file( video, video_path, live_pose_path, "source", require_bboxes=True, ) - video_replacement_version = _validate_pose_file( + video_replacement_version, _ = _validate_pose_file( video, video_path, replacement_pose_path, @@ -577,6 +727,13 @@ def _preflight_update_inputs( annotation_path = annotations_dir / Path(video).with_suffix(".json") if annotation_path.exists(): live_annotations.add(video) + orphans = _orphan_identities_in_annotation(annotation_path, live_num_identities) + if orphans: + orphan_issues.append((video, live_num_identities, orphans)) + + _handle_orphan_identities( + orphan_issues, source_label="live", tolerate=tolerate_orphan_identities + ) _validate_live_update_targets(project_dir, videos, live_annotations) @@ -990,6 +1147,7 @@ def update_project_pose_in_place( annotate_failures: bool = False, drop_timeline_annotations: bool = False, skip_feature_gen: bool = False, + tolerate_orphan_identities: bool = False, ) -> tuple[int, int, Path, str]: """Update a live project in place using replacement pose files from ``new_pose_dir``. @@ -1002,6 +1160,10 @@ def update_project_pose_in_place( drop_timeline_annotations: Whether to discard existing source timeline annotations instead of copying or remapping them. skip_feature_gen: Whether to skip feature generation after pose update. + tolerate_orphan_identities: When ``True``, warn about orphan identity + references in the live project's annotations and skip the affected + ``(identity, behavior)`` pairs during remap instead of aborting at + preflight. Returns: Tuple of ``(total_success, total_skipped, backup_path, feature_regen_status)`` for @@ -1014,6 +1176,7 @@ def update_project_pose_in_place( videos, replacement_pose_files, live_annotation_videos = _preflight_update_inputs( project_dir, new_pose_dir, + tolerate_orphan_identities=tolerate_orphan_identities, ) backup_path = _create_backup_archive(project_dir, videos) @@ -1125,6 +1288,16 @@ def update_project_pose_in_place( "contains explicit window_sizes." ), ) +@click.option( + "--tolerate-orphan-identities", + is_flag=True, + help=( + "Continue past orphan identity references in the live project's " + "annotation files (warn instead of erroring at preflight). The affected " + "(identity, behavior) pairs are skipped during remap; all other labels " + "are remapped normally. By default, the command aborts at preflight." + ), +) def update_pose_command( project: Path, new_pose_dir: Path, @@ -1133,6 +1306,7 @@ def update_pose_command( annotate_failures: bool, drop_timeline_annotations: bool, skip_feature_gen: bool, + tolerate_orphan_identities: bool, ) -> None: """Update a JABS project in place to use updated pose files while remapping labels.""" total_success, total_skipped, backup_path, feature_regen_status = update_project_pose_in_place( @@ -1143,6 +1317,7 @@ def update_pose_command( annotate_failures=annotate_failures, drop_timeline_annotations=drop_timeline_annotations, skip_feature_gen=skip_feature_gen, + tolerate_orphan_identities=tolerate_orphan_identities, ) click.echo(f"Backup archive: {backup_path}") diff --git a/tests/project/test_update_labels.py b/tests/project/test_update_labels.py index 1c8a2fdd..e6211a90 100644 --- a/tests/project/test_update_labels.py +++ b/tests/project/test_update_labels.py @@ -30,7 +30,12 @@ def _make_valid_project_dir( return project_dir -def _stub_pose_reader(monkeypatch, num_frames: int = 10, version: int = 8) -> None: +def _stub_pose_reader( + monkeypatch, + num_frames: int = 10, + version: int = 8, + num_identities: int = 2, +) -> None: """Replace pose-file readers used by the preflight with a permissive stub.""" monkeypatch.setattr( update_pose, @@ -39,6 +44,7 @@ def _stub_pose_reader(monkeypatch, num_frames: int = 10, version: int = 8) -> No format_major_version=version, has_bounding_boxes=True, num_frames=num_frames, + num_identities=num_identities, ), ) monkeypatch.setattr( @@ -162,6 +168,58 @@ def test_preflight_returns_labeled_videos_and_live_annotation_set(tmp_path, monk assert live_annotation_videos == {"video1.avi"} +def test_preflight_raises_on_orphan_source_identities(tmp_path, monkeypatch): + """Preflight must abort before any mutation when source labels reference orphans.""" + target_dir = tmp_path / "target" + source_dir = tmp_path / "source" + _make_valid_project_dir(target_dir, write_annotation=True) + _make_valid_project_dir(source_dir, write_annotation=False) + # Write a source annotation referencing identity 2 against a source pose that + # only advertises 2 identities (indices 0 and 1). + (source_dir / "jabs" / "annotations" / "video1.json").write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": {"0": {"Grooming": []}, "2": {"Grooming": []}}, + "labels": {}, + } + ) + ) + _stub_pose_reader(monkeypatch, num_identities=2) + + with pytest.raises(ValueError, match=r"video1\.avi.*identity 2"): + update_labels._preflight_label_update_inputs(target_dir, source_dir) + + +def test_preflight_tolerates_orphan_source_identities_when_flag_set(tmp_path, monkeypatch, capsys): + """``tolerate_orphan_identities=True`` lets preflight complete with a warning.""" + target_dir = tmp_path / "target" + source_dir = tmp_path / "source" + _make_valid_project_dir(target_dir, write_annotation=True) + _make_valid_project_dir(source_dir, write_annotation=False) + (source_dir / "jabs" / "annotations" / "video1.json").write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": {"0": {"Grooming": []}, "2": {"Grooming": []}}, + "labels": {}, + } + ) + ) + _stub_pose_reader(monkeypatch, num_identities=2) + + videos, _ = update_labels._preflight_label_update_inputs( + target_dir, source_dir, tolerate_orphan_identities=True + ) + + assert videos == ["video1.avi"] + stderr = capsys.readouterr().err + assert "WARNING" in stderr + assert "identity 2" in stderr + + def test_preflight_rejects_empty_target_dir(tmp_path): """The preflight should fail when the target dir has no videos and no jabs/ to scaffold from.""" target_dir = tmp_path / "target" @@ -546,6 +604,7 @@ def fake_update_project_labels_in_place( verbose, annotate_failures, drop_timeline_annotations, + tolerate_orphan_identities, ): captured.update( { @@ -555,6 +614,7 @@ def fake_update_project_labels_in_place( "verbose": verbose, "annotate_failures": annotate_failures, "drop_timeline_annotations": drop_timeline_annotations, + "tolerate_orphan_identities": tolerate_orphan_identities, } ) return 4, 2, project_dir / ".backup" / "update_labels_test.zip", ["Climbing"] @@ -577,6 +637,7 @@ def fake_update_project_labels_in_place( "--verbose", "--annotate-failures", "--drop-timeline-annotations", + "--tolerate-orphan-identities", ], ) @@ -588,6 +649,7 @@ def fake_update_project_labels_in_place( "verbose": True, "annotate_failures": True, "drop_timeline_annotations": True, + "tolerate_orphan_identities": True, } assert "Backup archive:" in result.output assert "Label update summary: 4 label blocks assigned, 2 label blocks skipped" in result.output @@ -634,7 +696,7 @@ def test_update_project_labels_in_place_invokes_pipeline(tmp_path, monkeypatch): monkeypatch.setattr( update_labels, "_preflight_label_update_inputs", - lambda *_args: (sequence.append("preflight") or (["video1.avi"], set())), + lambda *_args, **_kwargs: (sequence.append("preflight") or (["video1.avi"], set())), ) def fake_create_backup_archive(project_dir_arg, videos_arg, *, include_pose_files, prefix): @@ -702,7 +764,7 @@ def test_update_project_labels_in_place_passes_labeled_videos_and_description_ph monkeypatch.setattr( update_labels, "_preflight_label_update_inputs", - lambda *_args: (["video1.avi"], set()), + lambda *_args, **_kwargs: (["video1.avi"], set()), ) monkeypatch.setattr( update_labels, diff --git a/tests/project/test_update_pose.py b/tests/project/test_update_pose.py index 19f2f76b..0158454c 100644 --- a/tests/project/test_update_pose.py +++ b/tests/project/test_update_pose.py @@ -15,14 +15,22 @@ from jabs.scripts.cli.cli import cli -def _make_pose(identities, boxes_by_identity): - """Build a simple bbox-capable pose object for remap tests.""" +def _make_pose(identities, boxes_by_identity, *, num_identities=None): + """Build a simple bbox-capable pose object for remap tests. + + ``num_identities`` defaults to ``len(identities)`` (the consistent case). Pass an + explicit value to simulate a pose file whose total identity count diverges from + the iterable returned by ``.identities`` — e.g. annotations that reference an + identity index the pose can no longer supply. + """ num_frames = next(iter(boxes_by_identity.values())).shape[0] + identity_list = list(identities) return SimpleNamespace( format_major_version=8, has_bounding_boxes=True, num_frames=num_frames, - identities=list(identities), + num_identities=num_identities if num_identities is not None else len(identity_list), + identities=identity_list, get_bounding_boxes=lambda identity: boxes_by_identity[identity], identity_index_to_display=lambda identity: f"id-{identity}", ) @@ -71,7 +79,9 @@ def test_preflight_selects_only_latest_replacement_pose(tmp_path, monkeypatch): (new_pose_dir / "video1_pose_est_v7.h5").touch() def fake_open_pose_file(path, _cache_dir): - return SimpleNamespace(format_major_version=8, has_bounding_boxes=True, num_frames=10) + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) monkeypatch.setattr(update_pose, "open_pose_file", fake_open_pose_file) monkeypatch.setattr( @@ -106,7 +116,9 @@ def test_preflight_rejects_nonwritable_live_annotation_file(tmp_path, monkeypatc (new_pose_dir / "video1_pose_est_v8.h5").touch() def fake_open_pose_file(path, _cache_dir): - return SimpleNamespace(format_major_version=8, has_bounding_boxes=True, num_frames=10) + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) def fake_access(path, mode): return not (Path(path) == annotation_path and mode & os.W_OK) @@ -139,7 +151,9 @@ def test_preflight_rejects_nonwritable_annotations_directory(tmp_path, monkeypat (new_pose_dir / "video1_pose_est_v8.h5").touch() def fake_open_pose_file(path, _cache_dir): - return SimpleNamespace(format_major_version=8, has_bounding_boxes=True, num_frames=10) + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) def fake_access(path, mode): return not (Path(path) == annotations_dir and mode & os.W_OK) @@ -185,7 +199,9 @@ def test_preflight_allows_timeline_annotations(tmp_path, monkeypatch): ) def fake_open_pose_file(path, _cache_dir): - return SimpleNamespace(format_major_version=8, has_bounding_boxes=True, num_frames=10) + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) monkeypatch.setattr(update_pose, "open_pose_file", fake_open_pose_file) monkeypatch.setattr( @@ -221,6 +237,7 @@ def fake_update_project_pose_in_place( annotate_failures, drop_timeline_annotations, skip_feature_gen, + tolerate_orphan_identities, ): captured.update( { @@ -231,6 +248,7 @@ def fake_update_project_pose_in_place( "annotate_failures": annotate_failures, "drop_timeline_annotations": drop_timeline_annotations, "skip_feature_gen": skip_feature_gen, + "tolerate_orphan_identities": tolerate_orphan_identities, } ) return 3, 1, project_dir / ".backup" / "update_pose_test.zip", "skipped_by_option" @@ -254,6 +272,7 @@ def fake_update_project_pose_in_place( "--annotate-failures", "--drop-timeline-annotations", "--skip-feature-gen", + "--tolerate-orphan-identities", ], ) @@ -266,6 +285,7 @@ def fake_update_project_pose_in_place( "annotate_failures": True, "drop_timeline_annotations": True, "skip_feature_gen": True, + "tolerate_orphan_identities": True, } assert "Backup archive:" in result.output assert "Pose update summary: 3 label blocks assigned, 1 label blocks skipped" in result.output @@ -465,7 +485,7 @@ def test_update_project_pose_in_place_uses_preexisting_window_sizes_for_feature_ monkeypatch.setattr( update_pose, "_preflight_update_inputs", - lambda *_args: ( + lambda *_args, **_kwargs: ( ["video1.avi"], {"video1.avi": new_pose_dir / "video1_pose_est_v8.h5"}, set(), @@ -869,6 +889,247 @@ def test_remap_labels_for_video_failure_tag_fits_within_max_tag_len(): assert "label remap failed during label update" in annotations[0]["description"] +def test_orphan_identities_in_annotation_detects_label_track_keys(tmp_path): + """Identity keys in labels/unfragmented_labels above num_identities are reported.""" + annotation_path = tmp_path / "video1.json" + annotation_path.write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": { + "0": {"Grooming": []}, + "2": {"Grooming": []}, + "3": {"Grooming": []}, + }, + "labels": {}, + } + ) + ) + + assert update_pose._orphan_identities_in_annotation(annotation_path, 2) == [2, 3] + + +def test_orphan_identities_in_annotation_detects_timeline_annotation_identity(tmp_path): + """Identity-scoped timeline annotation entries are also checked.""" + annotation_path = tmp_path / "video1.json" + annotation_path.write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "labels": {}, + "annotations": [ + {"start": 1, "end": 2, "tag": "note", "color": "#fff"}, + {"start": 3, "end": 4, "tag": "n", "color": "#fff", "identity": 0}, + {"start": 5, "end": 6, "tag": "n", "color": "#fff", "identity": 5}, + ], + } + ) + ) + + assert update_pose._orphan_identities_in_annotation(annotation_path, 2) == [5] + + +def test_orphan_identities_in_annotation_handles_clean_files(tmp_path): + """A consistent annotation file yields an empty list.""" + annotation_path = tmp_path / "video1.json" + annotation_path.write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": {"0": {"Grooming": []}, "1": {"Grooming": []}}, + "labels": {}, + } + ) + ) + + assert update_pose._orphan_identities_in_annotation(annotation_path, 2) == [] + + +def test_orphan_identities_in_annotation_missing_file_returns_empty(tmp_path): + """A missing annotation file is treated as nothing to check rather than an error.""" + assert update_pose._orphan_identities_in_annotation(tmp_path / "nope.json", 2) == [] + + +def test_handle_orphan_identities_raises_by_default_listing_every_offender(): + """The composite error must list every video and identity, not just the first.""" + issues = [ + ("video1.avi", 2, [2, 3]), + ("video2.avi", 3, [4]), + ] + + with pytest.raises(ValueError) as excinfo: + update_pose._handle_orphan_identities(issues, source_label="source") + + msg = str(excinfo.value) + assert "video1.avi" in msg + assert "video2.avi" in msg + assert "identity 2, 3" in msg + assert "identity 4" in msg + # Should describe the remediation, not just the problem. + assert "Clean up" in msg or "clean up" in msg + # Should mention the escape-hatch flag so users discover it from the error. + assert "--tolerate-orphan-identities" in msg + + +def test_handle_orphan_identities_warns_when_tolerated(capsys): + """With ``tolerate=True``, issues are reported via stderr and execution continues.""" + issues = [("video1.avi", 2, [2, 3])] + + update_pose._handle_orphan_identities(issues, source_label="source", tolerate=True) + + stderr = capsys.readouterr().err + assert "WARNING" in stderr + assert "video1.avi" in stderr + assert "identity 2, 3" in stderr + assert "--tolerate-orphan-identities" in stderr + + +def test_handle_orphan_identities_noop_when_empty(): + """No exception, no output, regardless of tolerate.""" + update_pose._handle_orphan_identities([]) + update_pose._handle_orphan_identities([], tolerate=True) + + +def test_preflight_update_inputs_raises_on_orphan_source_identities(tmp_path, monkeypatch): + """Preflight must abort before any mutation when source labels reference orphans.""" + project_dir = tmp_path / "project" + new_pose_dir = tmp_path / "new_pose" + annotations_dir = project_dir / "jabs" / "annotations" + annotations_dir.mkdir(parents=True) + new_pose_dir.mkdir() + + (project_dir / "jabs" / "project.json").write_text("{}") + (project_dir / "video1.avi").touch() + (project_dir / "video1_pose_est_v8.h5").touch() + (new_pose_dir / "video1_pose_est_v8.h5").touch() + (annotations_dir / "video1.json").write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": {"0": {"Grooming": []}, "2": {"Grooming": []}}, + "labels": {}, + } + ) + ) + + def fake_open_pose_file(path, _cache_dir): + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) + + monkeypatch.setattr(update_pose, "open_pose_file", fake_open_pose_file) + monkeypatch.setattr( + update_pose.VideoReader, + "get_nframes_from_file", + staticmethod(lambda _path: 10), + ) + + with pytest.raises(ValueError, match=r"video1\.avi.*identity 2"): + update_pose._preflight_update_inputs(project_dir, new_pose_dir) + + +def test_preflight_update_inputs_tolerates_orphans_and_warns(tmp_path, monkeypatch, capsys): + """``tolerate_orphan_identities=True`` swaps the preflight error for a warning.""" + project_dir = tmp_path / "project" + new_pose_dir = tmp_path / "new_pose" + annotations_dir = project_dir / "jabs" / "annotations" + annotations_dir.mkdir(parents=True) + new_pose_dir.mkdir() + + (project_dir / "jabs" / "project.json").write_text("{}") + (project_dir / "video1.avi").touch() + (project_dir / "video1_pose_est_v8.h5").touch() + (new_pose_dir / "video1_pose_est_v8.h5").touch() + (annotations_dir / "video1.json").write_text( + json.dumps( + { + "file": "video1.avi", + "num_frames": 10, + "unfragmented_labels": {"0": {"Grooming": []}, "2": {"Grooming": []}}, + "labels": {}, + } + ) + ) + + def fake_open_pose_file(path, _cache_dir): + return SimpleNamespace( + format_major_version=8, has_bounding_boxes=True, num_frames=10, num_identities=2 + ) + + monkeypatch.setattr(update_pose, "open_pose_file", fake_open_pose_file) + monkeypatch.setattr( + update_pose.VideoReader, + "get_nframes_from_file", + staticmethod(lambda _path: 10), + ) + + videos, _, live_annotations = update_pose._preflight_update_inputs( + project_dir, new_pose_dir, tolerate_orphan_identities=True + ) + + assert videos == ["video1.avi"] + assert "video1.avi" in live_annotations + + stderr = capsys.readouterr().err + assert "WARNING" in stderr + assert "identity 2" in stderr + + +def test_remap_labels_for_video_inline_guard_skips_orphan_blocks(capsys): + """When preflight is tolerated, the inline guard skips orphan (identity, behavior) pairs. + + Without the guard, ``_find_best_identity`` would crash on + ``_bboxes_for_identity(src_pose, src_identity)`` for any out-of-range source + identity. The guard preserves all valid identities' blocks and reports a + single warning per skipped pair. + """ + src_boxes = np.array([[[0.0, 0.0], [10.0, 10.0]]] * 10) + dst_boxes = np.array([[[0.0, 0.0], [10.0, 10.0]]] * 10) + + source_pose = _make_pose([0, 1], {0: src_boxes, 1: src_boxes}, num_identities=2) + dest_pose = _make_pose([0, 1, 2], {0: dst_boxes, 1: dst_boxes, 2: dst_boxes}) + + source_labels = VideoLabels("video1.avi", 10) + source_labels.get_track_labels("0", "Grooming").label_behavior(1, 2) + orphan_track = source_labels.get_track_labels("2", "Grooming") + orphan_track.label_behavior(3, 4) + orphan_track.label_not_behavior(5, 6) + + label_source_project = MagicMock() + label_source_project.video_manager.video_path.return_value = Path("video1.avi") + label_source_project.video_manager.load_video_labels.return_value = source_labels + label_source_project.load_pose_est.return_value = source_pose + + label_dest_project = MagicMock() + label_dest_project.video_manager.video_path.return_value = Path("video1.avi") + label_dest_project.load_pose_est.return_value = dest_pose + label_dest_project.project_paths.annotations_dir = Path("/tmp/stage-annotations") + + success_count, skipped_count = update_pose._remap_labels_for_video( + "video1.avi", + label_source_project, + label_dest_project, + min_iou=0.5, + annotate_failures=True, + ) + + assert success_count == 1 + assert skipped_count == 2 + + stderr = capsys.readouterr().err + assert "src_id=2 not present in source pose" in stderr + assert "behavior=Grooming" in stderr + + # No failure annotation is written for the orphan blocks even with + # --annotate-failures, since the root cause is in source. + saved_labels = label_dest_project.save_annotations.call_args[0][0] + assert saved_labels.timeline_annotations.serialize() == [] + + def test_apply_live_update_replaces_pose_set_and_clears_derived_files(tmp_path): """Applying a staged pose update should replace annotations/project metadata and swap the pose set.""" project_dir = tmp_path / "project"