From cbae356b40e0fb371081df61117a066f5652c26f Mon Sep 17 00:00:00 2001 From: "Enrique G. Ortiz" Date: Sat, 14 Feb 2026 12:28:52 -0500 Subject: [PATCH 01/38] chore: archive v1.1 milestone Archive Deployment, Workflow & Competitive Parity milestone: - 7 phases (8-14), 20 plans, 26/26 requirements complete - Roadmap and requirements archived to .planning/milestones/ - PROJECT.md evolved with validated v1.1 requirements - ROADMAP.md collapsed v1.1 into details tag Co-Authored-By: Claude Opus 4.6 --- .planning/MILESTONES.md | 28 ++++ .planning/PROJECT.md | 109 +++++-------- .planning/ROADMAP.md | 143 +++--------------- .planning/STATE.md | 109 +++---------- .../v1.1-REQUIREMENTS.md} | 57 +------ .planning/milestones/v1.1-ROADMAP.md | 131 ++++++++++++++++ 6 files changed, 241 insertions(+), 336 deletions(-) rename .planning/{REQUIREMENTS.md => milestones/v1.1-REQUIREMENTS.md} (68%) create mode 100644 .planning/milestones/v1.1-ROADMAP.md diff --git a/.planning/MILESTONES.md b/.planning/MILESTONES.md index 23c5086..f4545bd 100644 --- a/.planning/MILESTONES.md +++ b/.planning/MILESTONES.md @@ -1,5 +1,33 @@ # Project Milestones: DataVisor +## v1.1 Deployment, Workflow & Competitive Parity (Shipped: 2026-02-13) + +**Delivered:** Production-ready Docker deployment, smart dataset ingestion UI, annotation editing, error triage workflows, interactive visualizations with grid filtering, keyboard shortcuts, and per-annotation TP/FP/FN classification. + +**Phases completed:** 8-14 (20 plans total) + +**Key accomplishments:** + +- Production-ready Docker stack (Caddy + FastAPI + Next.js) with single-user auth, GCP deployment scripts, and comprehensive documentation +- Smart dataset ingestion wizard with auto-detection of COCO layouts (Roboflow/Standard/Flat) and multi-split support +- Annotation editing via react-konva canvas (move, resize, draw, delete bounding boxes) with DuckDB persistence +- Error triage workflow: per-sample tagging, per-annotation TP/FP/FN auto-classification via IoU matching, worst-images ranking, and highlight mode +- Interactive data discovery: clickable confusion matrix, near-duplicate detection, histogram filtering, and find-similar — all piping results to the grid +- Full keyboard navigation with 16 shortcuts across grid, modal, triage, and editing contexts + +**Stats:** + +- 171 files created/modified +- ~19,460 lines of code added (9,306 Python + 10,154 TypeScript) +- 7 phases, 20 plans, 97 commits +- 2 days (Feb 12-13, 2026) + +**Git range:** `a83d6cf` → `1bed6cf` + +**What's next:** Format expansion (YOLO/VOC), PR curves, per-class AP metrics + +--- + ## v1.0 MVP (Shipped: 2026-02-12) **Delivered:** A unified CV dataset introspection tool with visual browsing, annotation overlays, model comparison, embedding visualization, error analysis, and AI-powered pattern detection. diff --git a/.planning/PROJECT.md b/.planning/PROJECT.md index 61ab957..b7f7f13 100644 --- a/.planning/PROJECT.md +++ b/.planning/PROJECT.md @@ -8,80 +8,45 @@ DataVisor is an open-source dataset introspection tool for computer vision — a A single tool that replaces scattered one-off scripts: load any CV dataset, visually browse with annotation overlays, compare ground truth against predictions, cluster via embeddings, and surface mistakes — all in one workflow. +## Current State + +**Shipped:** v1.1 (2026-02-13) +**Codebase:** ~32K LOC (16,256 Python + 15,924 TypeScript) across 14 phases +**Architecture:** FastAPI + DuckDB + Qdrant (backend), Next.js + Tailwind + deck.gl + Recharts (frontend), Pydantic AI (agents), Moondream2 (VLM) + ## Requirements ### Validated -- ✓ Multi-format ingestion (COCO) with streaming parser architecture — v1.0 -- ✓ DuckDB-backed metadata storage for fast analytical queries over 100K+ samples — v1.0 -- ✓ Virtualized infinite-scroll grid view with overlaid bounding box annotations — v1.0 -- ✓ Ground Truth vs Model Predictions comparison toggle (solid vs dashed lines) — v1.0 -- ✓ Deterministic class-to-color hashing (same class = same color across sessions) — v1.0 -- ✓ t-SNE embedding generation from images (DINOv2-base) — v1.0 -- ✓ deck.gl-powered 2D embedding scatterplot with zoom, pan, and lasso selection — v1.0 -- ✓ Lasso-to-grid filtering (select cluster points → filter grid to those images) — v1.0 -- ✓ Hover thumbnails on embedding map points — v1.0 -- ✓ Qdrant vector storage for embedding similarity search — v1.0 -- ✓ Error categorization: Hard False Positives, Label Errors, False Negatives — v1.0 -- ✓ Pydantic AI agent that monitors error distribution and recommends actions — v1.0 -- ✓ Pattern detection (e.g., "90% of False Negatives occur in low-light images") — v1.0 -- ✓ Import pre-computed predictions (JSON) — v1.0 -- ✓ BasePlugin class for Python extensibility — v1.0 -- ✓ Local disk and GCS image source support — v1.0 -- ✓ Dynamic metadata filtering (sidebar filters on any metadata field) — v1.0 -- ✓ VLM auto-tagging (Moondream2) for scene attribute tags — v1.0 -- ✓ Search by filename and sort by metadata — v1.0 -- ✓ Save and load filter configurations (saved views) — v1.0 -- ✓ Add/remove tags (individual + bulk) — v1.0 -- ✓ Sample detail modal with full-resolution image — v1.0 -- ✓ Dataset statistics dashboard (class distribution, annotation counts) — v1.0 - -### Active - -- [ ] Dockerized deployment with single-user auth for secure cloud VM access -- [ ] GCP deployment script + local run script with setup instructions -- [ ] Smart dataset ingestion UI (point at folder → auto-detect train/val/test splits → import) -- [ ] Annotation editing in the UI (move, resize, delete bounding boxes — depth TBD) -- [ ] Error triage workflow (tag FP/TP/FN/mistake, highlight errors, dim non-errors) -- [ ] Smart "worst images" ranking (combined score: errors + confidence + uniqueness) -- [ ] Keyboard shortcuts for navigation -- [ ] Competitive feature parity with FiftyOne/Encord (gaps TBD after research) +- Streaming COCO ingestion with ijson at 100K+ scale, local + GCS sources — v1.0 +- DuckDB metadata storage with fast analytical queries — v1.0 +- Virtualized grid with SVG annotation overlays, deterministic color hashing — v1.0 +- GT vs Predictions comparison toggle — v1.0 +- t-SNE embeddings with deck.gl scatter plot, lasso-to-grid filtering — v1.0 +- Error categorization (TP/FP/FN/Label Error) + Qdrant similarity search — v1.0 +- Pydantic AI agent for error patterns + Moondream2 VLM auto-tagging — v1.0 +- Metadata filtering, search, saved views, bulk tagging — v1.0 +- Docker 3-service stack with Caddy auth, GCP deployment scripts — v1.1 +- Smart ingestion UI with auto-detection of COCO layouts and multi-split support — v1.1 +- Annotation editing via react-konva (move, resize, draw, delete) — v1.1 +- Error triage: sample tagging, per-annotation TP/FP/FN via IoU, worst-images ranking, highlight mode — v1.1 +- Interactive discovery: confusion matrix, near-duplicates, histogram filtering, find-similar — v1.1 +- Keyboard shortcuts: 16 shortcuts across grid, modal, triage, editing — v1.1 ### Out of Scope -- Multi-user collaboration — personal tool, single-user auth only for VM security -- Video annotation support — image-only for now -- Training pipeline integration — DataVisor inspects data, doesn't train models +- Multi-user collaboration — personal tool, single-user auth only +- Video annotation support — image-only +- Training pipeline integration — DataVisor inspects data, doesn't train - Mobile/tablet interface — desktop browser only -- Real-time streaming inference — batch-oriented analysis -- Full annotation editor (draw new boxes, complex labeling workflows) — quick corrections only, not CVAT replacement - -## Current Milestone: v1.1 Deployment, Workflow & Competitive Parity - -**Goal:** Make DataVisor deployable (Docker + GCP), secure for cloud access, and close key workflow gaps vs FiftyOne/Encord — smart ingestion, error triage, annotation corrections, and keyboard-driven navigation. - -**Target features:** -- Dockerized project with single-user auth (basic auth for cloud VM security) -- GCP deployment script + local run script -- Smart dataset ingestion UI (auto-detect folder structure, train/val/test splits) -- Annotation management (organize + quick edit: move/resize/delete bboxes) -- Error triage & data curation workflow (tag, highlight, rank worst images) -- Keyboard shortcuts for navigation -- Competitive gaps from FiftyOne/Encord analysis - -## Context - -Shipped v1.0 with 12,720 LOC (6,950 Python + 5,770 TypeScript) across 7 phases and 21 plans. -Tech stack: FastAPI + DuckDB + Qdrant (backend), Next.js + Tailwind + deck.gl + Recharts (frontend), Pydantic AI (agents), Moondream2 (VLM). -59 backend tests passing. TypeScript compiles with 0 errors. -Architecture: 3 Zustand stores, FastAPI DI, source discriminator for GT/prediction separation, 4 SSE progress streams, lazy model loading. +- Full annotation editor (polygons, segmentation) — bounding box only ## Constraints - **Tech Stack**: FastAPI + DuckDB + Qdrant (backend), Next.js + Tailwind + deck.gl (frontend), Pydantic AI (agents) — established - **Performance**: Must handle 100K+ images without UI lag; DuckDB for metadata queries, deck.gl for WebGL rendering, virtualized scrolling - **Storage**: Supports both local filesystem and GCS bucket sources -- **GPU**: VLM inference (Moondream2) supports MPS/CUDA/CPU auto-detection; DINOv2 embeddings likewise +- **GPU**: VLM inference (Moondream2) supports MPS/CUDA/CPU auto-detection; SigLIP embeddings likewise - **Extensibility**: BasePlugin architecture exists; hooks system ready for expansion - **Python**: 3.14+ (numba/umap-learn incompatible; using scikit-learn t-SNE) @@ -89,16 +54,18 @@ Architecture: 3 Zustand stores, FastAPI DI, source discriminator for GT/predicti | Decision | Rationale | Outcome | |----------|-----------|---------| -| DuckDB over SQLite | Analytical queries on metadata at scale; columnar storage for filtering 100K+ rows | ✓ Good | -| Qdrant over FAISS | Payload filtering support; Rust-based performance; local deployment | ✓ Good | -| deck.gl for embedding viz | WebGL-powered; handles millions of points; lasso/interaction built-in | ✓ Good | -| Pydantic AI for agents | Type-safe agent definitions; native FastAPI/Pydantic integration | ✓ Good | -| Deterministic color hashing | Class names hash to consistent colors across sessions; no manual palette | ✓ Good | -| Plugin hooks over monolith | Ingestion/UI/transformation hooks enable domain-specific extensions without forking | ✓ Good | -| Source discriminator column | Clean GT/prediction separation in annotations table via source field | ✓ Good | -| Lazy model loading | VLM and Qdrant loaded on-demand, not at startup, to avoid memory pressure | ✓ Good | -| t-SNE over UMAP | umap-learn blocked by Python 3.14 numba incompatibility; t-SNE via scikit-learn | ⚠️ Revisit when numba supports 3.14 | -| Moondream2 via transformers | trust_remote_code with all_tied_weights_keys patch for transformers 5.x compat | ✓ Good (fragile — monitor updates) | +| DuckDB over SQLite | Analytical queries on metadata at scale; columnar storage for filtering 100K+ rows | Good | +| Qdrant over FAISS | Payload filtering support; Rust-based performance; local deployment | Good | +| deck.gl for embedding viz | WebGL-powered; handles millions of points; lasso/interaction built-in | Good | +| Pydantic AI for agents | Type-safe agent definitions; native FastAPI/Pydantic integration | Good | +| Deterministic color hashing | Class names hash to consistent colors across sessions; no manual palette | Good | +| Source discriminator column | Clean GT/prediction separation in annotations table via source field | Good | +| Caddy over nginx | Auto-HTTPS, built-in basic_auth, simpler config | Good | +| react-konva for editing | Canvas-based editing in modal; SVG stays for grid overlays | Good | +| Gemini 2.0 Flash for agent | Fast, cheap, good structured output; replaced GPT-4o | Good | +| Pre-computed agent prompt | All data in prompt, no tool calls; avoids Pydantic AI request_limit issues | Good | +| t-SNE over UMAP | umap-learn blocked by Python 3.14 numba incompatibility | Revisit when numba supports 3.14 | +| Moondream2 via transformers | trust_remote_code with all_tied_weights_keys patch for transformers 5.x | Fragile — monitor updates | --- -*Last updated: 2026-02-12 after v1.1 scope redefinition* +*Last updated: 2026-02-13 after v1.1 milestone completion* diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 6073dd0..20563c9 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -2,8 +2,8 @@ ## Milestones -- v1.0 MVP - Phases 1-7 (shipped 2026-02-12) -- **v1.1 Deployment, Workflow & Competitive Parity** - Phases 8-14 +- v1.0 MVP - Phases 1-7 (shipped 2026-02-12) — [archive](.planning/milestones/v1.0-ROADMAP.md) +- v1.1 Deployment, Workflow & Competitive Parity - Phases 8-14 (shipped 2026-02-13) — [archive](.planning/milestones/v1.1-ROADMAP.md) ## Phases @@ -40,143 +40,40 @@ -### v1.1 Deployment, Workflow & Competitive Parity - -**Milestone Goal:** Make DataVisor deployable (Docker + GCP), secure for cloud access, and close key workflow gaps vs FiftyOne/Encord -- smart ingestion, annotation editing, error triage, interactive visualizations, and keyboard-driven navigation. - -**Phase Numbering:** -- Integer phases (8, 9, 10, ...): Planned milestone work -- Decimal phases (9.1, 9.2): Urgent insertions (marked with INSERTED) - -Decimal phases appear between their surrounding integers in numeric order. - -- [x] **Phase 8: Docker Deployment & Auth** - Dockerized 3-service stack with Caddy reverse proxy, basic auth, and deployment scripts -- [x] **Phase 9: Smart Ingestion** - No-code dataset import from folder path with auto-detection and confirmation -- [x] **Phase 10: Annotation Editing** - Move, resize, delete, and draw bounding boxes via react-konva in sample detail modal -- [x] **Phase 11: Error Triage** - Tag errors, highlight mode, and worst-images ranking with DuckDB persistence -- [x] **Phase 12: Interactive Viz & Discovery** - Confusion matrix, near-duplicates, interactive histograms, and find-similar -- [x] **Phase 13: Keyboard Shortcuts** - Keyboard navigation, triage hotkeys, edit shortcuts, and help overlay -- [x] **Phase 14: Per-Annotation Triage** - Auto-discover TP/FP/FN per bounding box via IoU overlap, color-coded boxes in detail modal, click to override classifications - -## Phase Details +
+v1.1 Deployment, Workflow & Competitive Parity (Phases 8-14) - SHIPPED 2026-02-13 ### Phase 8: Docker Deployment & Auth -**Goal**: DataVisor runs as a deployable Docker stack with single-user auth, accessible securely on a cloud VM or locally with a single command -**Depends on**: Phase 7 (v1.0 complete) -**Requirements**: DEPLOY-01, DEPLOY-02, DEPLOY-03, DEPLOY-04, DEPLOY-05 -**Success Criteria** (what must be TRUE): - 1. User can run `docker compose up` and access DataVisor at `http://localhost` with all features working (grid, embeddings, error analysis) - 2. User is prompted for username/password before accessing any page or API endpoint, and unauthenticated requests are rejected - 3. User can run a deployment script that provisions a GCP VM with persistent disk and starts DataVisor accessible at a public IP with HTTPS - 4. User can follow deployment documentation to configure environment variables, deploy to GCP, and set up a custom domain - 5. DuckDB data, Qdrant vectors, and thumbnail cache persist across container restarts without data loss -**Plans**: 5 plans - -Plans: -- [x] 08-01-PLAN.md -- Backend Dockerfile + config fixes (CORS, DuckDB CHECKPOINT) -- [x] 08-02-PLAN.md -- Frontend Dockerfile + Caddyfile reverse proxy with auth -- [x] 08-03-PLAN.md -- Docker Compose orchestration + .dockerignore + env config -- [x] 08-04-PLAN.md -- Local run script + GCP deployment scripts -- [x] 08-05-PLAN.md -- Deployment documentation + full stack verification +**Goal**: Deployable Docker stack with single-user auth, accessible on cloud VM or locally +**Plans**: 5 plans (complete) ### Phase 9: Smart Ingestion -**Goal**: Users can import datasets from the UI by pointing at a folder, reviewing auto-detected structure, and confirming import -- no CLI or config files needed -**Depends on**: Phase 8 (auth protects new endpoints) -**Requirements**: INGEST-01, INGEST-02, INGEST-03, INGEST-04, INGEST-05 -**Success Criteria** (what must be TRUE): - 1. User can enter a folder path in the UI and trigger a scan that returns detected dataset structure - 2. Scanner correctly identifies COCO annotation files and image directories within the folder - 3. Scanner detects train/val/test split subdirectories and presents them as separate importable splits - 4. User sees the detected structure as a confirmation step and can approve or adjust before import begins - 5. Import progress displays per-split status via real-time SSE updates until completion -**Plans**: 2 plans - -Plans: -- [x] 09-01-PLAN.md -- Backend FolderScanner service, scan/import API endpoints, split-aware ingestion pipeline -- [x] 09-02-PLAN.md -- Frontend ingestion wizard (path input, scan results, import progress) + landing page link +**Goal**: No-code dataset import from folder path with auto-detection and confirmation +**Plans**: 2 plans (complete) ### Phase 10: Annotation Editing -**Goal**: Users can make quick bounding box corrections directly in the sample detail modal without leaving DataVisor -**Depends on**: Phase 8 (auth protects mutation endpoints) -**Requirements**: ANNOT-01, ANNOT-02, ANNOT-03, ANNOT-04, ANNOT-05 -**Success Criteria** (what must be TRUE): - 1. User can enter edit mode in the sample detail modal and drag a bounding box to a new position - 2. User can grab resize handles on a bounding box and change its dimensions - 3. User can delete a bounding box and the deletion persists after closing the modal - 4. User can draw a new bounding box and assign it a class label - 5. Only ground truth annotations show edit controls; prediction annotations remain read-only and non-interactive -**Plans**: 3 plans - -Plans: -- [x] 10-01-PLAN.md -- Backend annotation CRUD endpoints + frontend mutation hooks and types -- [x] 10-02-PLAN.md -- Konva building blocks: coord-utils, EditableRect, DrawLayer, ClassPicker -- [x] 10-03-PLAN.md -- AnnotationEditor composition, sample modal integration, annotation list delete +**Goal**: Move, resize, delete, and draw bounding boxes via react-konva in sample detail modal +**Plans**: 3 plans (complete) ### Phase 11: Error Triage -**Goal**: Users can systematically review and tag errors with a focused triage workflow that persists decisions and surfaces the worst samples first -**Depends on**: Phase 8 (extends v1.0 error analysis) -**Requirements**: TRIAGE-01, TRIAGE-02, TRIAGE-03 -**Success Criteria** (what must be TRUE): - 1. User can tag any sample or annotation as FP, TP, FN, or mistake, and the tag persists across page refreshes - 2. User can activate highlight mode to dim non-error samples in the grid, making errors visually prominent - 3. User can view a "worst images" ranking that surfaces samples with the highest combined error score (error count + confidence spread + uniqueness) -**Plans**: 2 plans - -Plans: -- [x] 11-01-PLAN.md -- Backend triage endpoints (set-triage-tag, worst-images scoring) + frontend hooks and types -- [x] 11-02-PLAN.md -- Triage tag buttons in detail modal, highlight mode grid dimming, worst-images stats panel +**Goal**: Tag errors, highlight mode, and worst-images ranking with DuckDB persistence +**Plans**: 2 plans (complete) ### Phase 12: Interactive Viz & Discovery -**Goal**: Users can explore dataset quality interactively -- clicking visualization elements filters the grid, finding similar samples and near-duplicates is one click away -**Depends on**: Phase 11 (triage data informs confusion matrix), Phase 8 (auth protects endpoints) -**Requirements**: ANNOT-06, TRIAGE-04, TRIAGE-05, TRIAGE-06 -**Success Criteria** (what must be TRUE): - 1. User can click "Find Similar" on any sample to see nearest neighbors from Qdrant displayed in the grid - 2. User can view a confusion matrix and click any cell to filter the grid to samples matching that GT/prediction pair - 3. User can trigger near-duplicate detection and browse groups of visually similar images - 4. User can click a bar in any statistics dashboard histogram to filter the grid to samples in that bucket -**Plans**: 3 plans - -Plans: -- [x] 12-01-PLAN.md -- Discovery filter foundation + Find Similar grid filtering + interactive histogram bars -- [x] 12-02-PLAN.md -- Clickable confusion matrix cells with backend sample ID resolution -- [x] 12-03-PLAN.md -- Near-duplicate detection via Qdrant pairwise search with SSE progress +**Goal**: Confusion matrix, near-duplicates, interactive histograms, and find-similar +**Plans**: 3 plans (complete) ### Phase 13: Keyboard Shortcuts -**Goal**: Power users can navigate, triage, and edit entirely from the keyboard without reaching for the mouse -**Depends on**: Phase 10 (annotation edit shortcuts), Phase 11 (triage shortcuts), Phase 12 (all UI features exist) -**Requirements**: UX-01, UX-02, UX-03, UX-04 -**Success Criteria** (what must be TRUE): - 1. User can navigate between samples in the grid and modal using arrow keys, j/k, Enter, and Escape - 2. User can quick-tag errors during triage using number keys and toggle highlight mode with h - 3. User can delete annotations and undo edits with keyboard shortcuts while in annotation edit mode - 4. User can press ? to open a shortcut help overlay listing all available keyboard shortcuts -**Plans**: 2 plans - -Plans: -- [x] 13-01-PLAN.md -- Foundation (react-hotkeys-hook, shortcut registry, ui-store) + grid keyboard navigation -- [x] 13-02-PLAN.md -- Modal shortcuts (navigation, triage, editing, undo) + help overlay +**Goal**: Keyboard navigation, triage hotkeys, edit shortcuts, and help overlay +**Plans**: 2 plans (complete) ### Phase 14: Per-Annotation Triage -**Goal**: Users can see auto-discovered TP/FP/FN classifications per bounding box based on IoU overlap, with color-coded visualization in the detail modal and the ability to click individual annotations to override their classification -**Depends on**: Phase 11 (extends triage system), Phase 6 (error analysis IoU matching) -**Success Criteria** (what must be TRUE): - 1. User opens a sample with GT and predictions and sees each bounding box color-coded as TP (green), FP (red), or FN (orange) based on automatic IoU matching - 2. User can click an individual bounding box to override its auto-assigned classification (e.g. mark an auto-TP as a mistake) - 3. Per-annotation triage decisions persist across page refreshes and are stored in DuckDB - 4. Highlight mode dims samples that have no triage annotations, making triaged samples visually prominent -**Plans**: 3 plans - -Plans: -- [x] 14-01-PLAN.md -- Backend schema, IoU matching service, and annotation triage API endpoints -- [x] 14-02-PLAN.md -- Frontend types, hooks, and clickable TriageOverlay SVG component -- [x] 14-03-PLAN.md -- Wire TriageOverlay into sample modal + highlight mode integration +**Goal**: Auto-discover TP/FP/FN per bounding box via IoU overlap, color-coded boxes in detail modal, click to override classifications +**Plans**: 3 plans (complete) -## Progress +
-**Execution Order:** -Phases execute in numeric order: 8 -> 9 -> 10 -> 11 -> 12 -> 13 -> 14 -(Note: Phases 9, 10, 11 are independent after Phase 8. Execution is sequential but no inter-dependency exists between 9/10/11.) +## Progress | Phase | Milestone | Plans Complete | Status | Completed | |-------|-----------|----------------|--------|-----------| diff --git a/.planning/STATE.md b/.planning/STATE.md index 1526a0a..61f341d 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -2,17 +2,17 @@ ## Project Reference -See: .planning/PROJECT.md (updated 2026-02-12) +See: .planning/PROJECT.md (updated 2026-02-13) **Core value:** A single tool that replaces scattered scripts: load any CV dataset, visually browse with annotation overlays, compare GT vs predictions, cluster via embeddings, and surface mistakes -- all in one workflow. -**Current focus:** v1.1 complete. All 14 phases delivered. +**Current focus:** v1.1 shipped. No active milestone. ## Current Position -Phase: 14 of 14 (Per-Annotation Triage) -Plan: 3 of 3 in current phase -Status: Complete -Last activity: 2026-02-13 -- Phase 14 verified and complete +Phase: -- +Plan: -- +Status: Between milestones (v1.1 shipped, v1.2 not started) +Last activity: 2026-02-13 -- v1.1 milestone archived Progress: [████████████████████████████████████████████████████████████] v1.1: 41/41 plans complete @@ -23,96 +23,16 @@ Progress: [███████████████████████ - Average duration: 3.9 min - Total execution time: 82 min -**By Phase (v1.0):** - -| Phase | Plans | Total | Avg/Plan | -|-------|-------|-------|----------| -| 1. Data Foundation | 4/4 | 14 min | 3.5 min | -| 2. Visual Grid | 3/3 | 15 min | 5.0 min | -| 3. Filtering & Search | 2/2 | 10 min | 5.0 min | -| 4. Predictions & Comparison | 3/3 | 9 min | 3.0 min | -| 5. Embeddings & Visualization | 4/4 | 16 min | 4.0 min | -| 6. Error Analysis & Similarity | 2/2 | 9 min | 4.5 min | -| 7. Intelligence & Agents | 3/3 | 9 min | 3.0 min | - -**By Phase (v1.1):** - -| Phase | Plans | Total | Avg/Plan | -|-------|-------|-------|----------| -| 8. Docker Deployment & Auth | 5/5 | 25 min | 5.0 min | -| 9. Smart Ingestion | 2/2 | 10 min | 5.0 min | -| 10. Annotation Editing | 3/3 | 9 min | 3.0 min | -| 11. Error Triage | 2/2 | 6 min | 3.0 min | -| 12. Interactive Viz & Discovery | 3/3 | 10 min | 3.3 min | -| 13. Keyboard Shortcuts | 2/2 | 6 min | 3.0 min | -| 14. Per-Annotation Triage | 3/3 | 7 min | 2.3 min | +**Velocity (v1.1):** +- Total plans completed: 20 +- Average duration: 3.7 min +- Total execution time: 73 min ## Accumulated Context ### Decisions Decisions are logged in PROJECT.md Key Decisions table. -Recent decisions affecting current work: - -- [v1.1 Roadmap]: Keep Qdrant in local mode for Docker (single-user <1M vectors) -- [v1.1 Roadmap]: Caddy over nginx for reverse proxy (auto-HTTPS, built-in basic_auth) -- [v1.1 Roadmap]: react-konva for annotation editing in detail modal only (SVG stays for grid) -- [v1.1 Roadmap]: FastAPI HTTPBasic DI over middleware (testable, composable) -- [08-01]: CPU-only PyTorch via post-sync replacement in Dockerfile (uv sync then uv pip install from CPU index) -- [08-01]: CORS restricted to localhost:3000 in dev, disabled entirely behind proxy (DATAVISOR_BEHIND_PROXY=true) -- [08-02]: NEXT_PUBLIC_API_URL=/api baked at build time for same-origin API via Caddy -- [08-02]: Caddy handles all auth at proxy layer -- zero application code changes -- [08-03]: Directory bind mount ./data:/app/data for DuckDB WAL + Qdrant + thumbnails persistence -- [08-03]: AUTH_PASSWORD_HASH has no default -- forces explicit auth configuration before deployment -- [08-03]: Only Caddy exposes ports 80/443 -- backend and frontend are Docker-internal only -- [08-04]: VM startup script does NOT auto-start docker compose -- requires manual .env setup first -- [08-04]: GCP config via env vars with defaults (only GCP_PROJECT_ID required) -- [08-05]: 10-section deployment docs covering local Docker, GCP, custom domain HTTPS, data persistence, troubleshooting -- [08-05]: opencv-python-headless replaces opencv-python in Docker builder stage (no X11/GUI libs in slim images) -- [09-01]: Three-layout priority detection: Roboflow > Standard COCO > Flat -- [09-01]: ijson peek at top-level keys for COCO detection (max 10 keys, files >500MB skipped) -- [09-01]: Optional dataset_id param on ingest_with_progress for multi-split ID sharing -- [09-01]: INSERT-or-UPDATE pattern for dataset record across multi-split imports -- [09-02]: POST SSE streaming via fetch + ReadableStream (not EventSource, which is GET-only) -- [09-02]: FolderScanner refactored to accept StorageBackend for GCS support -- [09-02]: Split-prefixed IDs for collision avoidance in multi-split import -- [10-01]: get_cursor DI for annotation router (auto-close cursor) -- [10-01]: source='ground_truth' enforced in SQL WHERE clauses for PUT/DELETE safety -- [10-01]: Dataset counts refreshed via subquery UPDATE (no race conditions) -- [10-02]: useDrawLayer hook pattern (handlers + ReactNode) instead of separate component -- [10-02]: Transformer scale reset to 1 on transformEnd (Konva best practice) -- [10-03]: AnnotationEditor loaded via next/dynamic with ssr:false (prevents Konva SSR errors) -- [10-03]: Draw completion shows ClassPicker before creating annotation (requires category selection) -- [10-03]: Delete buttons only appear on ground_truth rows when edit mode is active -- [11-01]: Dual router pattern (samples_router + datasets_router) from single triage module -- [11-01]: Atomic triage tag replacement via list_filter + list_append single SQL -- [11-01]: get_db DI pattern for triage router (matching statistics.py style) -- [11-02]: Triage buttons always visible in detail modal (not gated by edit mode) -- [11-02]: Highlight toggle uses yellow-500 active styling to distinguish from edit buttons -- [11-02]: Triage tag badges show short label (TP/FP/FN/MISTAKE) instead of full prefix -- [12-01]: Lasso selection takes priority over discovery filter (effectiveIds = lassoSelectedIds ?? sampleIdFilter) -- [12-01]: "Show in Grid" button only appears after similarity results load (progressive disclosure) -- [12-01]: getState() pattern for store access in Recharts onClick handlers (non-reactive) -- [12-01]: DiscoveryFilterChip in dataset header for cross-tab visibility -- [12-02]: Imperative fetch function (not hook) for one-shot confusion cell sample lookups -- [12-02]: Greedy IoU matching replayed per sample for consistent CM cell membership -- [12-02]: getState() pattern for Zustand store writes in async callbacks -- [12-03]: Tab bar always visible so Near Duplicates is accessible without predictions -- [12-03]: Union-find with path compression for O(alpha(n)) grouping of pairwise matches -- [12-03]: Progress updates throttled to every 10 points to avoid excessive state updates -- [13-01]: isFocused passed as prop from ImageGrid (avoids N store subscriptions per GridCell) -- [13-01]: Central shortcut registry pattern: all shortcuts as data in lib/shortcuts.ts -- [13-02]: Single useHotkeys('1, 2, 3, 4') with event.key dispatch (avoids rules-of-hooks violation) -- [13-02]: Single-level undo stack via React state for annotation delete undo -- [13-02]: Triage number keys disabled during edit mode (prevents Konva focus confusion) -- [13-02]: groupByCategory via reduce instead of Object.groupBy (avoids es2024 lib dep) -- [14-01]: Reuse _compute_iou_matrix from evaluation.py (no duplicate IoU code) -- [14-01]: Auto-computed labels ephemeral (computed on GET, not stored); overrides persist in annotation_triage table -- [14-01]: triage:annotated sample tag bridges per-annotation triage to highlight mode -- [14-02]: TriageOverlay is separate from AnnotationOverlay (interactive vs non-interactive SVG) -- [14-02]: Click handler delegates to parent via callback (overlay does not manage mutations) -- [14-02]: Annotations not in triageMap skipped (handles GT-only samples gracefully) -- [14-03]: GT boxes show category name only, predictions show category + confidence% (color conveys triage type) ### Pending Todos @@ -120,10 +40,15 @@ None. ### Blockers/Concerns -- [RESOLVED] SVG-to-Canvas coordinate mismatch resolved by coord-utils.ts (10-02) +None active. + +### Roadmap Evolution + +- v1.0: 7 phases (1-7), 21 plans — shipped 2026-02-12 +- v1.1: 7 phases (8-14), 20 plans — shipped 2026-02-13 ## Session Continuity Last session: 2026-02-13 -Stopped at: Phase 14 complete, v1.1 milestone complete +Stopped at: v1.1 milestone archived Resume file: None diff --git a/.planning/REQUIREMENTS.md b/.planning/milestones/v1.1-REQUIREMENTS.md similarity index 68% rename from .planning/REQUIREMENTS.md rename to .planning/milestones/v1.1-REQUIREMENTS.md index a8c8887..12b090b 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/milestones/v1.1-REQUIREMENTS.md @@ -1,11 +1,10 @@ -# Requirements: DataVisor v1.1 +# Requirements Archive: DataVisor v1.1 **Defined:** 2026-02-12 -**Core Value:** A single tool that replaces scattered scripts: load any CV dataset, visually browse with annotation overlays, compare GT vs predictions, cluster via embeddings, and surface mistakes — all in one workflow. +**Completed:** 2026-02-13 +**Core Value:** A single tool that replaces scattered scripts: load any CV dataset, visually browse with annotation overlays, compare GT vs predictions, cluster via embeddings, and surface mistakes -- all in one workflow. -## v1.1 Requirements - -Requirements for Deployment, Workflow & Competitive Parity milestone. +## v1.1 Requirements (All Complete) ### Deployment & Infrastructure @@ -39,7 +38,7 @@ Requirements for Deployment, Workflow & Competitive Parity milestone. - [x] **TRIAGE-03**: "Worst images" ranking surfaces samples with highest combined error score (error count + confidence spread + uniqueness) - [x] **TRIAGE-04**: Interactive confusion matrix that filters grid when a cell is clicked - [x] **TRIAGE-05**: Near-duplicate detection surfaces visually similar images in the dataset -- [x] **TRIAGE-06**: Interactive histograms on the statistics dashboard — clicking a bar filters the grid +- [x] **TRIAGE-06**: Interactive histograms on the statistics dashboard -- clicking a bar filters the grid ### UX @@ -48,46 +47,8 @@ Requirements for Deployment, Workflow & Competitive Parity milestone. - [x] **UX-03**: Keyboard shortcuts for annotation editing (Delete, Ctrl+Z, e for edit mode) - [x] **UX-04**: Shortcut help overlay triggered by ? key -## v1.2 Requirements - -Deferred to future milestone. Tracked but not in current roadmap. - -### Format Expansion - -- **FMT-01**: YOLO format parser (.txt annotation files with class_id + normalized xywh) -- **FMT-02**: Pascal VOC format parser (XML annotation files) -- **FMT-03**: Dataset export in COCO and YOLO formats - -### Evaluation - -- **EVAL-01**: PR curves per class -- **EVAL-02**: Per-class AP metrics dashboard - -### Advanced - -- **ADV-01**: Model zoo / in-app inference (ONNX/TorchScript) -- **ADV-02**: Custom workspaces / panel layouts -- **ADV-03**: Customizable keyboard shortcut remapping -- **ADV-04**: CVAT/Label Studio integration for complex annotation workflows - -## Out of Scope - -Explicitly excluded. Documented to prevent scope creep. - -| Feature | Reason | -|---------|--------| -| Multi-user collaboration / RBAC | Personal tool — single-user auth for VM security only | -| Video annotation support | Image-only for now; multiplies complexity | -| Training pipeline integration | DataVisor inspects data, doesn't train models | -| Mobile/tablet interface | Desktop browser only | -| Real-time streaming inference | Batch-oriented analysis | -| 3D point cloud visualization | Different rendering pipeline entirely | -| Full annotation editor (polygon, segmentation) | Bounding box CRUD only for v1.1 | - ## Traceability -Which phases cover which requirements. Updated during roadmap creation. - | Requirement | Phase | Status | |-------------|-------|--------| | DEPLOY-01 | Phase 8 | Complete | @@ -117,11 +78,7 @@ Which phases cover which requirements. Updated during roadmap creation. | UX-03 | Phase 13 | Complete | | UX-04 | Phase 13 | Complete | -**Coverage:** -- v1.1 requirements: 26 total -- Mapped to phases: 26 -- Unmapped: 0 +**Coverage:** 26/26 requirements complete (100%) --- -*Requirements defined: 2026-02-12* -*Last updated: 2026-02-13 — Phase 13 requirements marked Complete (v1.1 milestone complete)* +*Archived: 2026-02-13* diff --git a/.planning/milestones/v1.1-ROADMAP.md b/.planning/milestones/v1.1-ROADMAP.md new file mode 100644 index 0000000..ad66394 --- /dev/null +++ b/.planning/milestones/v1.1-ROADMAP.md @@ -0,0 +1,131 @@ +# Milestone v1.1: Deployment, Workflow & Competitive Parity + +**Status:** SHIPPED 2026-02-13 +**Phases:** 8-14 +**Total Plans:** 20 + +## Overview + +Make DataVisor deployable (Docker + GCP), secure for cloud access, and close key workflow gaps vs FiftyOne/Encord -- smart ingestion, annotation editing, error triage, interactive visualizations, and keyboard-driven navigation. + +## Phases + +### Phase 8: Docker Deployment & Auth + +**Goal**: DataVisor runs as a deployable Docker stack with single-user auth, accessible securely on a cloud VM or locally with a single command +**Depends on**: Phase 7 (v1.0 complete) +**Requirements**: DEPLOY-01, DEPLOY-02, DEPLOY-03, DEPLOY-04, DEPLOY-05 +**Plans**: 5 plans + +Plans: +- [x] 08-01: Backend Dockerfile + config fixes (CORS, DuckDB CHECKPOINT) +- [x] 08-02: Frontend Dockerfile + Caddyfile reverse proxy with auth +- [x] 08-03: Docker Compose orchestration + .dockerignore + env config +- [x] 08-04: Local run script + GCP deployment scripts +- [x] 08-05: Deployment documentation + full stack verification + +### Phase 9: Smart Ingestion + +**Goal**: Users can import datasets from the UI by pointing at a folder, reviewing auto-detected structure, and confirming import -- no CLI or config files needed +**Depends on**: Phase 8 (auth protects new endpoints) +**Requirements**: INGEST-01, INGEST-02, INGEST-03, INGEST-04, INGEST-05 +**Plans**: 2 plans + +Plans: +- [x] 09-01: Backend FolderScanner service, scan/import API endpoints, split-aware ingestion pipeline +- [x] 09-02: Frontend ingestion wizard (path input, scan results, import progress) + landing page link + +### Phase 10: Annotation Editing + +**Goal**: Users can make quick bounding box corrections directly in the sample detail modal without leaving DataVisor +**Depends on**: Phase 8 (auth protects mutation endpoints) +**Requirements**: ANNOT-01, ANNOT-02, ANNOT-03, ANNOT-04, ANNOT-05 +**Plans**: 3 plans + +Plans: +- [x] 10-01: Backend annotation CRUD endpoints + frontend mutation hooks and types +- [x] 10-02: Konva building blocks: coord-utils, EditableRect, DrawLayer, ClassPicker +- [x] 10-03: AnnotationEditor composition, sample modal integration, annotation list delete + +### Phase 11: Error Triage + +**Goal**: Users can systematically review and tag errors with a focused triage workflow that persists decisions and surfaces the worst samples first +**Depends on**: Phase 8 (extends v1.0 error analysis) +**Requirements**: TRIAGE-01, TRIAGE-02, TRIAGE-03 +**Plans**: 2 plans + +Plans: +- [x] 11-01: Backend triage endpoints (set-triage-tag, worst-images scoring) + frontend hooks and types +- [x] 11-02: Triage tag buttons in detail modal, highlight mode grid dimming, worst-images stats panel + +### Phase 12: Interactive Viz & Discovery + +**Goal**: Users can explore dataset quality interactively -- clicking visualization elements filters the grid, finding similar samples and near-duplicates is one click away +**Depends on**: Phase 11 (triage data informs confusion matrix), Phase 8 (auth protects endpoints) +**Requirements**: ANNOT-06, TRIAGE-04, TRIAGE-05, TRIAGE-06 +**Plans**: 3 plans + +Plans: +- [x] 12-01: Discovery filter foundation + Find Similar grid filtering + interactive histogram bars +- [x] 12-02: Clickable confusion matrix cells with backend sample ID resolution +- [x] 12-03: Near-duplicate detection via Qdrant pairwise search with SSE progress + +### Phase 13: Keyboard Shortcuts + +**Goal**: Power users can navigate, triage, and edit entirely from the keyboard without reaching for the mouse +**Depends on**: Phase 10 (annotation edit shortcuts), Phase 11 (triage shortcuts), Phase 12 (all UI features exist) +**Requirements**: UX-01, UX-02, UX-03, UX-04 +**Plans**: 2 plans + +Plans: +- [x] 13-01: Foundation (react-hotkeys-hook, shortcut registry, ui-store) + grid keyboard navigation +- [x] 13-02: Modal shortcuts (navigation, triage, editing, undo) + help overlay + +### Phase 14: Per-Annotation Triage + +**Goal**: Users can see auto-discovered TP/FP/FN classifications per bounding box based on IoU overlap, with color-coded visualization in the detail modal and the ability to click individual annotations to override their classification +**Depends on**: Phase 11 (extends triage system), Phase 6 (error analysis IoU matching) +**Plans**: 3 plans + +Plans: +- [x] 14-01: Backend schema, IoU matching service, and annotation triage API endpoints +- [x] 14-02: Frontend types, hooks, and clickable TriageOverlay SVG component +- [x] 14-03: Wire TriageOverlay into sample modal + highlight mode integration + +## Milestone Summary + +**Key Decisions:** + +- Caddy over nginx for reverse proxy (auto-HTTPS, built-in basic_auth) +- CPU-only PyTorch via post-sync replacement in Dockerfile +- react-konva for annotation editing (SVG stays for grid overlays) +- FastAPI HTTPBasic DI over middleware (testable, composable) +- Atomic triage tag replacement via list_filter + list_append single SQL +- Union-find with path compression for near-duplicate grouping +- Central shortcut registry pattern (all shortcuts as data) +- Auto-computed triage labels ephemeral (computed on GET); overrides persist in annotation_triage table +- Switched AI agent from OpenAI GPT-4o to Google Gemini 2.0 Flash +- Pre-compute all data for AI agent prompt (no tool calls needed) + +**Issues Resolved:** + +- opencv-python-headless for Docker slim images (no X11 libs needed) +- DuckDB WAL stale file recovery via CHECKPOINT on shutdown +- PyTorch CPU install order (uv sync first, then replace with CPU wheel) +- Pydantic AI request_limit exceeded by Gemini tool-call loop (eliminated tools) +- GEMINI_API_KEY not loading (load_dotenv for third-party libs) +- pyvips missing for Moondream2 auto-tag (added dependency) + +**Issues Deferred:** + +- UMAP blocked by Python 3.14 numba incompatibility (using t-SNE) +- Moondream2 trust_remote_code fragile with transformers updates + +**Technical Debt Incurred:** + +- Module-level cache for Intelligence panel results (should use React Query cache) +- Old triage tags filtered client-side (OBSOLETE_TRIAGE_TAGS set in grid-cell.tsx) + +--- + +_For current project status, see .planning/ROADMAP.md_ From 928cfb2f720d13bc6e106950fd714f734fba7924 Mon Sep 17 00:00:00 2001 From: "Enrique G. Ortiz" Date: Mon, 16 Feb 2026 16:41:09 -0500 Subject: [PATCH 02/38] feat: add class filter checkboxes to statistics overview tab Allow users to exclude specific classes from statistics computation to analyze data slices by label. Collapsible checkbox panel with select-all/deselect-all controls filters class distribution chart and recomputes summary stats client-side. Co-Authored-By: Claude Opus 4.6 --- .../src/components/stats/class-filter.tsx | 115 ++++++++++++++++++ .../src/components/stats/stats-dashboard.tsx | 78 ++++++++++-- 2 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 frontend/src/components/stats/class-filter.tsx diff --git a/frontend/src/components/stats/class-filter.tsx b/frontend/src/components/stats/class-filter.tsx new file mode 100644 index 0000000..8a65a1d --- /dev/null +++ b/frontend/src/components/stats/class-filter.tsx @@ -0,0 +1,115 @@ +"use client"; + +/** + * Collapsible class filter with checkboxes for selecting which categories + * to include in statistics computation. + * + * Provides select-all / deselect-all controls and displays the active + * filter count as a badge when collapsed. + */ + +import { useState } from "react"; + +interface ClassFilterProps { + /** All available category names (from class distribution data). */ + categories: string[]; + /** Set of category names currently excluded from statistics. */ + excludedClasses: Set; + /** Toggle a single category's inclusion. */ + onToggle: (category: string) => void; + /** Include all categories. */ + onSelectAll: () => void; + /** Exclude all categories. */ + onDeselectAll: () => void; +} + +export function ClassFilter({ + categories, + excludedClasses, + onToggle, + onSelectAll, + onDeselectAll, +}: ClassFilterProps) { + const [isExpanded, setIsExpanded] = useState(false); + const includedCount = categories.length - excludedClasses.size; + const isFiltered = excludedClasses.size > 0; + + return ( +
+ {/* Header - always visible */} + + + {/* Expandable body */} + {isExpanded && ( +
+ {/* Bulk actions */} +
+ + | + +
+ + {/* Checkbox list */} +
+ {categories.map((name) => ( + + ))} +
+
+ )} +
+ ); +} diff --git a/frontend/src/components/stats/stats-dashboard.tsx b/frontend/src/components/stats/stats-dashboard.tsx index b1f602d..bc72d3f 100644 --- a/frontend/src/components/stats/stats-dashboard.tsx +++ b/frontend/src/components/stats/stats-dashboard.tsx @@ -11,13 +11,14 @@ * - Intelligence: AI-powered error pattern analysis and recommendations */ -import { useState } from "react"; +import { useState, useMemo, useCallback } from "react"; import { useStatistics } from "@/hooks/use-statistics"; import { useFilterFacets } from "@/hooks/use-filter-facets"; import { useSplit, useFilterStore } from "@/stores/filter-store"; import { AnnotationSummary } from "@/components/stats/annotation-summary"; import { ClassDistribution } from "@/components/stats/class-distribution"; +import { ClassFilter } from "@/components/stats/class-filter"; import { SplitBreakdown } from "@/components/stats/split-breakdown"; import { EvaluationPanel } from "@/components/stats/evaluation-panel"; import { ErrorAnalysisPanel } from "@/components/stats/error-analysis-panel"; @@ -54,9 +55,59 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) { const { data: facets } = useFilterFacets(datasetId); const { data: stats, isLoading, error } = useStatistics(datasetId, split); const [activeTab, setActiveTab] = useState("overview"); + const [excludedClasses, setExcludedClasses] = useState>(new Set()); const availableSplits = facets?.splits.map((s) => s.name) ?? []; - const hasPredictions = stats && stats.summary.pred_annotations > 0; + + // All category names from the unfiltered class distribution + const allCategories = useMemo( + () => stats?.class_distribution.map((c) => c.category_name) ?? [], + [stats], + ); + + // Derive filtered class distribution and recomputed summary + const filteredStats = useMemo(() => { + if (!stats) return null; + if (excludedClasses.size === 0) return stats; + + const filteredDist = stats.class_distribution.filter( + (c) => !excludedClasses.has(c.category_name), + ); + const gtAnnotations = filteredDist.reduce((sum, c) => sum + c.gt_count, 0); + const predAnnotations = filteredDist.reduce((sum, c) => sum + c.pred_count, 0); + + return { + ...stats, + class_distribution: filteredDist, + summary: { + ...stats.summary, + gt_annotations: gtAnnotations, + pred_annotations: predAnnotations, + total_categories: filteredDist.length, + }, + }; + }, [stats, excludedClasses]); + + const hasPredictions = filteredStats && filteredStats.summary.pred_annotations > 0; + + const handleToggleClass = useCallback((category: string) => { + setExcludedClasses((prev) => { + const next = new Set(prev); + if (next.has(category)) { + next.delete(category); + } else { + next.add(category); + } + return next; + }); + }, []); + + const handleSelectAll = useCallback(() => setExcludedClasses(new Set()), []); + + const handleDeselectAll = useCallback( + () => setExcludedClasses(new Set(allCategories)), + [allCategories], + ); if (error) { return ( @@ -170,12 +221,23 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) { {activeTab === "overview" && ( <> + {/* Class Filter */} + {allCategories.length > 1 && ( + + )} + {/* Summary Stats */}

Summary

- {isLoading || !stats ? ( + {isLoading || !filteredStats ? (
@@ -183,7 +245,7 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) {
) : ( - + )}
@@ -192,11 +254,11 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) {

Class Distribution

- {isLoading || !stats ? ( + {isLoading || !filteredStats ? ( ) : ( <> - +

Click any bar to filter the grid by category

@@ -209,10 +271,10 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) {

Split Breakdown

- {isLoading || !stats ? ( + {isLoading || !filteredStats ? ( ) : ( - + )} From a468c629d40b8916a09472c2d8be19dbc4223096 Mon Sep 17 00:00:00 2001 From: "Enrique G. Ortiz" Date: Mon, 16 Feb 2026 16:49:09 -0500 Subject: [PATCH 03/38] feat: apply class filter to evaluation tab with cached recomputation Lift ClassFilter above sub-tab navigation so it's shared across all tabs. EvaluationPanel now receives excludedClasses and uses a new useFilteredEvaluation hook that: - Filters PR curves, per-class metrics, and confusion matrix rows/cols - Recomputes mAP as mean of filtered per-class AP values - Synthesizes a new "all" PR curve from included classes (COCO 101-pt) - Caches results in a Map keyed by serialized excluded set, so revisiting the same combination is O(1) Co-Authored-By: Claude Opus 4.6 --- .../src/components/stats/evaluation-panel.tsx | 7 +- .../src/components/stats/stats-dashboard.tsx | 24 +-- frontend/src/hooks/use-filtered-evaluation.ts | 172 ++++++++++++++++++ 3 files changed, 189 insertions(+), 14 deletions(-) create mode 100644 frontend/src/hooks/use-filtered-evaluation.ts diff --git a/frontend/src/components/stats/evaluation-panel.tsx b/frontend/src/components/stats/evaluation-panel.tsx index fa323e1..d38e90e 100644 --- a/frontend/src/components/stats/evaluation-panel.tsx +++ b/frontend/src/components/stats/evaluation-panel.tsx @@ -11,6 +11,7 @@ import { useState, useEffect, useMemo, useCallback } from "react"; import { useFilterFacets } from "@/hooks/use-filter-facets"; import { useEvaluation } from "@/hooks/use-evaluation"; +import { useFilteredEvaluation } from "@/hooks/use-filtered-evaluation"; import { fetchConfusionCellSamples } from "@/hooks/use-confusion-cell"; import { useFilterStore } from "@/stores/filter-store"; import { useUIStore } from "@/stores/ui-store"; @@ -22,6 +23,7 @@ import { PerClassTable } from "@/components/stats/per-class-table"; interface EvaluationPanelProps { datasetId: string; split: string | null; + excludedClasses: Set; } function useDebouncedValue(value: T, delay: number): T { @@ -50,7 +52,7 @@ function SkeletonChart({ height }: { height: string }) { ); } -export function EvaluationPanel({ datasetId, split }: EvaluationPanelProps) { +export function EvaluationPanel({ datasetId, split, excludedClasses }: EvaluationPanelProps) { const { data: facets } = useFilterFacets(datasetId); // Available prediction sources (exclude ground_truth) @@ -76,13 +78,14 @@ export function EvaluationPanel({ datasetId, split }: EvaluationPanelProps) { const debouncedIou = useDebouncedValue(iouThreshold, 300); const debouncedConf = useDebouncedValue(confThreshold, 300); - const { data, isLoading } = useEvaluation( + const { data: rawData, isLoading } = useEvaluation( datasetId, source, debouncedIou, debouncedConf, split, ); + const data = useFilteredEvaluation(rawData, excludedClasses); const handleCellClick = useCallback( async (actualClass: string, predictedClass: string) => { diff --git a/frontend/src/components/stats/stats-dashboard.tsx b/frontend/src/components/stats/stats-dashboard.tsx index bc72d3f..0f7ffb8 100644 --- a/frontend/src/components/stats/stats-dashboard.tsx +++ b/frontend/src/components/stats/stats-dashboard.tsx @@ -151,6 +151,17 @@ export function StatsDashboard({ datasetId }: StatsDashboardProps) { )} + {/* Class filter (shared across sub-tabs, like the split selector) */} + {allCategories.length > 1 && ( + + )} + {/* Sub-tab navigation (always visible -- Near Duplicates works without predictions) */}