From 362b7519837d639208f9d41fc655f0d3e57b36c6 Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 10:31:21 -0700 Subject: [PATCH 01/12] feat(metadata): add per-library scheduled refresh config storage Lay the groundwork for scheduled, scoped, per-library metadata refreshes by adding a `metadata_refresh_config` JSON column to the `libraries` table, a strongly typed `MetadataRefreshConfig` with safe defaults (disabled, "use existing source IDs only", default field groups ratings/status/counts), and library-repository accessors that always return a usable config (default on NULL or parse error, with the parse error logged). The schema reserves a `per_provider_overrides` slot for future per-provider field allowlists so the eventual override feature won't need a migration. A `MetadataRefreshConfigPatch` and `merge_partial` helper are included for the upcoming PATCH endpoint; they are gated with `#[allow(dead_code)]` until that endpoint lands. Tests cover serde round-trips, partial JSON, parse-helper edge cases, PATCH merge semantics including null clears, and end-to-end repository read/write/overwrite plus default-on-NULL and invalid-JSON fallback. --- migration/src/lib.rs | 4 + ...0503_000071_add_metadata_refresh_config.rs | 49 ++ src/db/entities/libraries.rs | 6 + src/db/repositories/library.rs | 228 ++++++++++ src/scanner/library_scanner.rs | 1 + src/services/metadata/mod.rs | 6 + src/services/metadata/refresh_config.rs | 418 ++++++++++++++++++ 7 files changed, 712 insertions(+) create mode 100644 migration/src/m20260503_000071_add_metadata_refresh_config.rs create mode 100644 src/services/metadata/refresh_config.rs diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 36cf07dd..b3d752c8 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -143,6 +143,8 @@ mod m20260502_000068_drop_book_count; mod m20260503_000069_add_book_chapter; // Backfill volume/chapter from filename for already-scanned books (Phase 12) mod m20260503_000070_backfill_book_volume_chapter; +// Per-library scheduled metadata refresh config (Phase 1 of scheduled-metadata-refresh) +mod m20260503_000071_add_metadata_refresh_config; pub struct Migrator; @@ -258,6 +260,8 @@ impl MigratorTrait for Migrator { Box::new(m20260503_000069_add_book_chapter::Migration), // Backfill book_metadata.volume / .chapter from filename (Phase 12) Box::new(m20260503_000070_backfill_book_volume_chapter::Migration), + // Per-library scheduled metadata refresh config (Phase 1) + Box::new(m20260503_000071_add_metadata_refresh_config::Migration), ] } } diff --git a/migration/src/m20260503_000071_add_metadata_refresh_config.rs b/migration/src/m20260503_000071_add_metadata_refresh_config.rs new file mode 100644 index 00000000..330e7617 --- /dev/null +++ b/migration/src/m20260503_000071_add_metadata_refresh_config.rs @@ -0,0 +1,49 @@ +//! Add `metadata_refresh_config` column to `libraries` (Phase 1 of scheduled-metadata-refresh). +//! +//! Stores per-library JSON configuration for the scheduled metadata refresh +//! feature: cron schedule, field groups to refresh, providers, and safety +//! options. Mirrors the existing `scanning_config`/`title_preprocessing_rules` +//! pattern: a nullable TEXT column whose contents are parsed lazily. +//! +//! NULL means "feature off, defaults applied when read." No data backfill +//! needed; the feature is opt-in per library. + +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Libraries::Table) + .add_column(ColumnDef::new(Libraries::MetadataRefreshConfig).text()) + .to_owned(), + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Libraries::Table) + .drop_column(Libraries::MetadataRefreshConfig) + .to_owned(), + ) + .await?; + + Ok(()) + } +} + +#[derive(DeriveIden)] +enum Libraries { + Table, + MetadataRefreshConfig, +} diff --git a/src/db/entities/libraries.rs b/src/db/entities/libraries.rs index d5973961..3e143fd1 100644 --- a/src/db/entities/libraries.rs +++ b/src/db/entities/libraries.rs @@ -38,6 +38,12 @@ pub struct Model { /// Controls when auto-matching runs for this library #[sea_orm(column_type = "Text")] pub auto_match_conditions: Option, + /// Scheduled metadata-refresh configuration as JSON object + /// Controls per-library cron, field groups, providers, and safety toggles + /// for the scheduled metadata refresh feature. NULL = feature off (defaults + /// applied when read). + #[sea_orm(column_type = "Text")] + pub metadata_refresh_config: Option, pub created_at: DateTime, pub updated_at: DateTime, pub last_scanned_at: Option>, diff --git a/src/db/repositories/library.rs b/src/db/repositories/library.rs index 6cadd94a..a55255e5 100644 --- a/src/db/repositories/library.rs +++ b/src/db/repositories/library.rs @@ -31,6 +31,7 @@ pub struct CreateLibraryParams { pub excluded_patterns: Option, pub title_preprocessing_rules: Option, pub auto_match_conditions: Option, + pub metadata_refresh_config: Option, } impl CreateLibraryParams { @@ -51,6 +52,7 @@ impl CreateLibraryParams { excluded_patterns: None, title_preprocessing_rules: None, auto_match_conditions: None, + metadata_refresh_config: None, } } @@ -98,6 +100,11 @@ impl CreateLibraryParams { self.auto_match_conditions = conditions; self } + + pub fn with_metadata_refresh_config(mut self, config: Option) -> Self { + self.metadata_refresh_config = config; + self + } } /// Repository for Library operations @@ -129,6 +136,7 @@ impl LibraryRepository { excluded_patterns: Set(params.excluded_patterns), title_preprocessing_rules: Set(params.title_preprocessing_rules), auto_match_conditions: Set(params.auto_match_conditions), + metadata_refresh_config: Set(params.metadata_refresh_config), created_at: Set(now), updated_at: Set(now), last_scanned_at: Set(None), @@ -217,6 +225,7 @@ impl LibraryRepository { excluded_patterns: Set(library.excluded_patterns.clone()), title_preprocessing_rules: Set(library.title_preprocessing_rules.clone()), auto_match_conditions: Set(library.auto_match_conditions.clone()), + metadata_refresh_config: Set(library.metadata_refresh_config.clone()), created_at: Set(library.created_at), updated_at: Set(Utc::now()), last_scanned_at: Set(library.last_scanned_at), @@ -326,6 +335,66 @@ impl LibraryRepository { } } } + + /// Get the scheduled metadata-refresh config for a library by ID. + /// + /// Loads the row, parses `metadata_refresh_config`, and returns the + /// strongly-typed config. NULL or invalid JSON falls back to + /// [`crate::services::metadata::MetadataRefreshConfig::default`] (with a + /// warning logged for invalid JSON), so callers can rely on always + /// receiving a usable config rather than handling None or parse errors. + pub async fn get_metadata_refresh_config( + db: &DatabaseConnection, + library_id: Uuid, + ) -> Result { + use crate::services::metadata::{MetadataRefreshConfig, parse_metadata_refresh_config}; + + let library = Self::get_by_id(db, library_id) + .await? + .ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?; + + match parse_metadata_refresh_config(library.metadata_refresh_config.as_deref()) { + Ok(cfg) => Ok(cfg), + Err(e) => { + tracing::warn!( + "Failed to parse metadata refresh config for library {}: {} — using defaults", + library_id, + e + ); + Ok(MetadataRefreshConfig::default()) + } + } + } + + /// Persist the scheduled metadata-refresh config for a library. + /// + /// Serializes to JSON and writes the `metadata_refresh_config` column + /// without touching the rest of the row. Bumps `updated_at`. + pub async fn set_metadata_refresh_config( + db: &DatabaseConnection, + library_id: Uuid, + config: &crate::services::metadata::MetadataRefreshConfig, + ) -> Result<()> { + let library = Libraries::find_by_id(library_id) + .one(db) + .await + .context("Failed to load library for metadata-refresh config update")? + .ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?; + + let json = + serde_json::to_string(config).context("Failed to serialize metadata refresh config")?; + + let mut active: libraries::ActiveModel = library.into(); + active.metadata_refresh_config = Set(Some(json)); + active.updated_at = Set(Utc::now()); + + active + .update(db) + .await + .context("Failed to update metadata refresh config")?; + + Ok(()) + } } #[cfg(test)] @@ -958,4 +1027,163 @@ mod tests { let conditions = LibraryRepository::get_auto_match_conditions(&library); assert!(conditions.is_none()); } + + // ========================================================================= + // metadata_refresh_config accessors + // ========================================================================= + + #[tokio::test] + async fn test_get_metadata_refresh_config_returns_default_when_null() { + use crate::services::metadata::MetadataRefreshConfig; + + let (db, _temp_dir) = create_test_db().await; + + let library = LibraryRepository::create( + db.sea_orm_connection(), + "Refresh Default Library", + "/refresh/default", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + let cfg = + LibraryRepository::get_metadata_refresh_config(db.sea_orm_connection(), library.id) + .await + .unwrap(); + + assert_eq!(cfg, MetadataRefreshConfig::default()); + assert!(!cfg.enabled); + assert!(cfg.existing_source_ids_only); + } + + #[tokio::test] + async fn test_set_and_get_metadata_refresh_config_round_trip() { + use crate::services::metadata::MetadataRefreshConfig; + + let (db, _temp_dir) = create_test_db().await; + + let library = LibraryRepository::create( + db.sea_orm_connection(), + "Refresh Configured Library", + "/refresh/configured", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 6 * * *".to_string(), + timezone: Some("UTC".to_string()), + field_groups: vec!["ratings".to_string(), "status".to_string()], + extra_fields: vec!["language".to_string()], + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: false, + skip_recently_synced_within_s: 1800, + max_concurrency: 8, + per_provider_overrides: None, + }; + + LibraryRepository::set_metadata_refresh_config(db.sea_orm_connection(), library.id, &cfg) + .await + .unwrap(); + + let loaded = + LibraryRepository::get_metadata_refresh_config(db.sea_orm_connection(), library.id) + .await + .unwrap(); + + assert_eq!(loaded, cfg); + } + + #[tokio::test] + async fn test_set_metadata_refresh_config_overwrites_previous_value() { + use crate::services::metadata::MetadataRefreshConfig; + + let (db, _temp_dir) = create_test_db().await; + + let library = LibraryRepository::create( + db.sea_orm_connection(), + "Overwrite Library", + "/refresh/overwrite", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + let initial = MetadataRefreshConfig { + enabled: true, + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config( + db.sea_orm_connection(), + library.id, + &initial, + ) + .await + .unwrap(); + + let updated = MetadataRefreshConfig { + enabled: false, + cron_schedule: "0 0 8 * * *".to_string(), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config( + db.sea_orm_connection(), + library.id, + &updated, + ) + .await + .unwrap(); + + let loaded = + LibraryRepository::get_metadata_refresh_config(db.sea_orm_connection(), library.id) + .await + .unwrap(); + + assert!(!loaded.enabled); + assert_eq!(loaded.cron_schedule, "0 0 8 * * *"); + } + + #[tokio::test] + async fn test_get_metadata_refresh_config_invalid_json_falls_back_to_default() { + use crate::services::metadata::MetadataRefreshConfig; + + let (db, _temp_dir) = create_test_db().await; + + let mut library = LibraryRepository::create( + db.sea_orm_connection(), + "Bad JSON Library", + "/refresh/bad-json", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + // Stuff invalid JSON directly into the column to simulate corruption + library.metadata_refresh_config = Some("{not json".to_string()); + LibraryRepository::update(db.sea_orm_connection(), &library) + .await + .unwrap(); + + let cfg = + LibraryRepository::get_metadata_refresh_config(db.sea_orm_connection(), library.id) + .await + .unwrap(); + + // Falls back to defaults rather than failing the request + assert_eq!(cfg, MetadataRefreshConfig::default()); + } + + #[tokio::test] + async fn test_get_metadata_refresh_config_unknown_library_errors() { + let (db, _temp_dir) = create_test_db().await; + + let result = + LibraryRepository::get_metadata_refresh_config(db.sea_orm_connection(), Uuid::new_v4()) + .await; + + assert!(result.is_err()); + } } diff --git a/src/scanner/library_scanner.rs b/src/scanner/library_scanner.rs index 587cd79b..3df868ca 100644 --- a/src/scanner/library_scanner.rs +++ b/src/scanner/library_scanner.rs @@ -1655,6 +1655,7 @@ mod tests { excluded_patterns, title_preprocessing_rules: None, auto_match_conditions: None, + metadata_refresh_config: None, created_at: Utc::now(), updated_at: Utc::now(), last_scanned_at: None, diff --git a/src/services/metadata/mod.rs b/src/services/metadata/mod.rs index 8dbf3d03..25c70e12 100644 --- a/src/services/metadata/mod.rs +++ b/src/services/metadata/mod.rs @@ -11,7 +11,13 @@ mod apply; mod book_apply; mod cover; pub mod preprocessing; +pub mod refresh_config; pub use apply::{ApplyOptions, MetadataApplier, SkippedField}; pub use book_apply::{BookApplyOptions, BookMetadataApplier}; pub use cover::CoverService; +pub use refresh_config::{MetadataRefreshConfig, parse_metadata_refresh_config}; +// Re-exported for downstream phases (HTTP PATCH endpoint, per-provider +// overrides). Allowed to be unused until those phases land. +#[allow(unused_imports)] +pub use refresh_config::{MetadataRefreshConfigPatch, ProviderOverride}; diff --git a/src/services/metadata/refresh_config.rs b/src/services/metadata/refresh_config.rs new file mode 100644 index 00000000..22bd6ed9 --- /dev/null +++ b/src/services/metadata/refresh_config.rs @@ -0,0 +1,418 @@ +//! Configuration for the scheduled per-library metadata refresh. +//! +//! Stored as JSON in `libraries.metadata_refresh_config`. NULL in the database +//! means "feature off" — readers get [`MetadataRefreshConfig::default`] which +//! is the safe, opt-in default. +//! +//! # Shape (JSON) +//! +//! ```json +//! { +//! "enabled": false, +//! "cron_schedule": "0 0 4 * * *", +//! "timezone": null, +//! "field_groups": ["ratings", "status", "counts"], +//! "extra_fields": [], +//! "providers": [], +//! "existing_source_ids_only": true, +//! "skip_recently_synced_within_s": 3600, +//! "max_concurrency": 4, +//! "per_provider_overrides": null +//! } +//! ``` +//! +//! All fields have safe defaults so a partial document round-trips cleanly. + +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +/// Default cron schedule: every day at 04:00 (server timezone). +/// +/// 6-field cron expression matching the syntax used by `tokio_cron_scheduler` +/// for the rest of the codebase. +pub const DEFAULT_CRON_SCHEDULE: &str = "0 0 4 * * *"; + +/// Default safety window: skip series whose external IDs were synced within +/// this many seconds. 1 hour is a reasonable floor for daily cadences. +pub const DEFAULT_SKIP_RECENTLY_SYNCED_SECS: u32 = 3600; + +/// Default fan-out per task. Conservative because providers may share rate +/// limits per host. +pub const DEFAULT_MAX_CONCURRENCY: u8 = 4; + +/// Per-library scheduled metadata-refresh configuration. +/// +/// Stored as JSON. Use [`Self::merge_partial`] to apply a PATCH body without +/// clobbering unspecified fields. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct MetadataRefreshConfig { + /// Whether the schedule fires automatically. When `false`, the scheduler + /// has no entry for this library; manual `run-now` still works. + #[serde(default)] + pub enabled: bool, + + /// 6-field cron expression. Defaults to [`DEFAULT_CRON_SCHEDULE`]. + #[serde(default = "default_cron_schedule")] + pub cron_schedule: String, + + /// Optional IANA timezone name (e.g. `"Europe/Paris"`). When `None`, the + /// scheduler falls back to the server timezone. + #[serde(default)] + pub timezone: Option, + + /// User-facing field groups to refresh. Translated to a concrete field + /// list by `field_groups::fields_for_groups` in Phase 3. + #[serde(default = "default_field_groups")] + pub field_groups: Vec, + + /// Extra individual field names not covered by any group. Power-user + /// hatch; usually empty. + #[serde(default)] + pub extra_fields: Vec, + + /// Plugin IDs (e.g. `"plugin:mangabaka"`) to query. An empty list means + /// "no providers configured" — the task short-circuits. + #[serde(default)] + pub providers: Vec, + + /// When true, the planner skips any series without a stored external ID + /// for the chosen provider. Default `true` keeps user-curated matches + /// safe from re-matching. + #[serde(default = "default_existing_source_ids_only")] + pub existing_source_ids_only: bool, + + /// Skip series whose `series_external_ids.last_synced_at` is younger + /// than this many seconds. `0` disables the guard. + #[serde(default = "default_skip_recently_synced")] + pub skip_recently_synced_within_s: u32, + + /// Per-task fan-out. Bounded `JoinSet` size in Phase 2. + #[serde(default = "default_max_concurrency")] + pub max_concurrency: u8, + + /// Phase 8 hatch: per-provider field allowlist overrides. `None` until + /// Phase 8 lands; preserved through round-trips. + #[serde(default)] + pub per_provider_overrides: Option>, +} + +/// Per-provider override placeholder for Phase 8. Kept here so the config +/// schema is forward-compatible without future migrations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct ProviderOverride { + #[serde(default)] + pub field_groups: Vec, + #[serde(default)] + pub extra_fields: Vec, +} + +fn default_cron_schedule() -> String { + DEFAULT_CRON_SCHEDULE.to_string() +} + +fn default_field_groups() -> Vec { + vec![ + "ratings".to_string(), + "status".to_string(), + "counts".to_string(), + ] +} + +fn default_existing_source_ids_only() -> bool { + true +} + +fn default_skip_recently_synced() -> u32 { + DEFAULT_SKIP_RECENTLY_SYNCED_SECS +} + +fn default_max_concurrency() -> u8 { + DEFAULT_MAX_CONCURRENCY +} + +impl Default for MetadataRefreshConfig { + fn default() -> Self { + Self { + enabled: false, + cron_schedule: default_cron_schedule(), + timezone: None, + field_groups: default_field_groups(), + extra_fields: Vec::new(), + providers: Vec::new(), + existing_source_ids_only: default_existing_source_ids_only(), + skip_recently_synced_within_s: default_skip_recently_synced(), + max_concurrency: default_max_concurrency(), + per_provider_overrides: None, + } + } +} + +/// Partial PATCH body for the refresh-config CRUD endpoint (Phase 6). +/// +/// `Option>` semantics: +/// - `None` (`#[serde(default)]`): field absent from the body, do not touch. +/// - `Some(None)`: explicitly clear/null the field. +/// - `Some(Some(x))`: set the field to `x`. +/// +/// Fields that are intrinsically non-nullable (`enabled`, `cron_schedule`, +/// numeric counters) use plain `Option` because clearing them doesn't make +/// sense. `timezone` and `per_provider_overrides` use the double-Option form +/// since they are nullable in storage. +/// +/// Consumed by the Phase 6 PATCH endpoint; allowed to be unused until then. +#[allow(dead_code)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", default)] +pub struct MetadataRefreshConfigPatch { + pub enabled: Option, + pub cron_schedule: Option, + #[serde(default, deserialize_with = "deserialize_some")] + pub timezone: Option>, + pub field_groups: Option>, + pub extra_fields: Option>, + pub providers: Option>, + pub existing_source_ids_only: Option, + pub skip_recently_synced_within_s: Option, + pub max_concurrency: Option, + #[serde(default, deserialize_with = "deserialize_some")] + pub per_provider_overrides: Option>>, +} + +/// Distinguish "field absent" from "field present with value null" during +/// deserialization. Without this, serde collapses both to `None`. +#[allow(dead_code)] +fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> +where + T: Deserialize<'de>, + D: serde::Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} + +impl MetadataRefreshConfig { + /// Apply a PATCH body, leaving fields not mentioned by the patch alone. + /// + /// Consumed by the Phase 6 PATCH endpoint; allowed to be unused until then. + #[allow(dead_code)] + pub fn merge_partial(&mut self, patch: MetadataRefreshConfigPatch) { + if let Some(v) = patch.enabled { + self.enabled = v; + } + if let Some(v) = patch.cron_schedule { + self.cron_schedule = v; + } + if let Some(v) = patch.timezone { + self.timezone = v; + } + if let Some(v) = patch.field_groups { + self.field_groups = v; + } + if let Some(v) = patch.extra_fields { + self.extra_fields = v; + } + if let Some(v) = patch.providers { + self.providers = v; + } + if let Some(v) = patch.existing_source_ids_only { + self.existing_source_ids_only = v; + } + if let Some(v) = patch.skip_recently_synced_within_s { + self.skip_recently_synced_within_s = v; + } + if let Some(v) = patch.max_concurrency { + self.max_concurrency = v; + } + if let Some(v) = patch.per_provider_overrides { + self.per_provider_overrides = v; + } + } +} + +/// Parse a `MetadataRefreshConfig` from the JSON string stored in +/// `libraries.metadata_refresh_config`. +/// +/// Returns `Default::default()` for `None` or empty input. Surfaces parse +/// errors as `Err(String)` for the caller to log; callers usually fall back +/// to the default rather than failing the request. +pub fn parse_metadata_refresh_config(json: Option<&str>) -> Result { + match json { + None => Ok(MetadataRefreshConfig::default()), + Some(s) if s.trim().is_empty() => Ok(MetadataRefreshConfig::default()), + Some(s) => serde_json::from_str(s) + .map_err(|e| format!("Failed to parse metadata refresh config: {}", e)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_is_disabled_with_safe_values() { + let cfg = MetadataRefreshConfig::default(); + assert!(!cfg.enabled); + assert_eq!(cfg.cron_schedule, DEFAULT_CRON_SCHEDULE); + assert!(cfg.timezone.is_none()); + assert_eq!(cfg.field_groups, vec!["ratings", "status", "counts"]); + assert!(cfg.extra_fields.is_empty()); + assert!(cfg.providers.is_empty()); + assert!(cfg.existing_source_ids_only); + assert_eq!( + cfg.skip_recently_synced_within_s, + DEFAULT_SKIP_RECENTLY_SYNCED_SECS + ); + assert_eq!(cfg.max_concurrency, DEFAULT_MAX_CONCURRENCY); + assert!(cfg.per_provider_overrides.is_none()); + } + + #[test] + fn serde_round_trip_full() { + let mut overrides = BTreeMap::new(); + overrides.insert( + "plugin:anilist".to_string(), + ProviderOverride { + field_groups: vec!["ratings".to_string()], + extra_fields: vec!["coverUrl".to_string()], + }, + ); + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 30 2 * * *".to_string(), + timezone: Some("Europe/Paris".to_string()), + field_groups: vec!["ratings".to_string(), "counts".to_string()], + extra_fields: vec!["language".to_string()], + providers: vec!["plugin:mangabaka".to_string(), "plugin:anilist".to_string()], + existing_source_ids_only: false, + skip_recently_synced_within_s: 7200, + max_concurrency: 8, + per_provider_overrides: Some(overrides), + }; + + let json = serde_json::to_string(&cfg).unwrap(); + let parsed: MetadataRefreshConfig = serde_json::from_str(&json).unwrap(); + assert_eq!(cfg, parsed); + } + + #[test] + fn serde_partial_json_uses_defaults_for_missing_fields() { + let json = r#"{"enabled": true, "cron_schedule": "0 0 5 * * *"}"#; + let cfg: MetadataRefreshConfig = serde_json::from_str(json).unwrap(); + assert!(cfg.enabled); + assert_eq!(cfg.cron_schedule, "0 0 5 * * *"); + // All other fields fall back to defaults + assert!(cfg.existing_source_ids_only); + assert_eq!(cfg.field_groups, vec!["ratings", "status", "counts"]); + assert_eq!(cfg.max_concurrency, DEFAULT_MAX_CONCURRENCY); + } + + #[test] + fn parse_none_returns_default() { + let cfg = parse_metadata_refresh_config(None).unwrap(); + assert_eq!(cfg, MetadataRefreshConfig::default()); + } + + #[test] + fn parse_empty_string_returns_default() { + let cfg = parse_metadata_refresh_config(Some("")).unwrap(); + assert_eq!(cfg, MetadataRefreshConfig::default()); + let cfg = parse_metadata_refresh_config(Some(" ")).unwrap(); + assert_eq!(cfg, MetadataRefreshConfig::default()); + } + + #[test] + fn parse_invalid_json_errors() { + let err = parse_metadata_refresh_config(Some("not json")).unwrap_err(); + assert!(err.contains("Failed to parse metadata refresh config")); + } + + #[test] + fn parse_valid_json_round_trip() { + let json = r#"{ + "enabled": true, + "cron_schedule": "0 0 6 * * *", + "timezone": "UTC", + "field_groups": ["ratings"], + "providers": ["plugin:mangabaka"], + "existing_source_ids_only": false, + "skip_recently_synced_within_s": 0, + "max_concurrency": 2 + }"#; + let cfg = parse_metadata_refresh_config(Some(json)).unwrap(); + assert!(cfg.enabled); + assert_eq!(cfg.timezone.as_deref(), Some("UTC")); + assert_eq!(cfg.field_groups, vec!["ratings"]); + assert_eq!(cfg.providers, vec!["plugin:mangabaka"]); + assert!(!cfg.existing_source_ids_only); + assert_eq!(cfg.skip_recently_synced_within_s, 0); + assert_eq!(cfg.max_concurrency, 2); + } + + #[test] + fn merge_partial_overwrites_only_present_fields() { + let mut cfg = MetadataRefreshConfig::default(); + let original_cron = cfg.cron_schedule.clone(); + + let patch = MetadataRefreshConfigPatch { + enabled: Some(true), + field_groups: Some(vec!["ratings".to_string()]), + ..Default::default() + }; + cfg.merge_partial(patch); + + assert!(cfg.enabled); + assert_eq!(cfg.field_groups, vec!["ratings"]); + // Untouched fields keep their prior values. + assert_eq!(cfg.cron_schedule, original_cron); + assert!(cfg.existing_source_ids_only); + } + + #[test] + fn merge_partial_clears_nullable_fields() { + let mut cfg = MetadataRefreshConfig { + timezone: Some("Europe/Paris".to_string()), + ..Default::default() + }; + + // Explicit null clears the field + let patch: MetadataRefreshConfigPatch = + serde_json::from_str(r#"{"timezone": null}"#).unwrap(); + cfg.merge_partial(patch); + assert!(cfg.timezone.is_none()); + } + + #[test] + fn merge_partial_absent_field_preserves_value() { + let mut cfg = MetadataRefreshConfig { + timezone: Some("UTC".to_string()), + ..Default::default() + }; + + // Absent field should leave timezone untouched + let patch: MetadataRefreshConfigPatch = + serde_json::from_str(r#"{"enabled": true}"#).unwrap(); + cfg.merge_partial(patch); + assert_eq!(cfg.timezone.as_deref(), Some("UTC")); + assert!(cfg.enabled); + } + + #[test] + fn merge_partial_sets_then_clears_per_provider_overrides() { + let mut cfg = MetadataRefreshConfig::default(); + + // Set + let patch: MetadataRefreshConfigPatch = serde_json::from_str( + r#"{"per_provider_overrides": {"plugin:anilist": {"field_groups": ["ratings"]}}}"#, + ) + .unwrap(); + cfg.merge_partial(patch); + assert!(cfg.per_provider_overrides.is_some()); + + // Clear via explicit null + let patch: MetadataRefreshConfigPatch = + serde_json::from_str(r#"{"per_provider_overrides": null}"#).unwrap(); + cfg.merge_partial(patch); + assert!(cfg.per_provider_overrides.is_none()); + } +} From 37564123c8562e8f9d5542a663999bcca125f6e2 Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 11:00:58 -0700 Subject: [PATCH 02/12] feat(metadata): add scheduled per-library refresh task type and handler Wire the work behind the scheduled metadata refresh: a new RefreshLibraryMetadata task variant, a stateless RefreshPlanner that decides which (series, provider) pairs to touch, and a worker handler that fetches per-series metadata from plugins and applies it through the existing MetadataApplier. The planner resolves "plugin:" provider strings to enabled plugins (unresolved entries surface as plan-level skips), batches external-id lookups, and emits typed skip reasons (NoExternalId, RecentlySynced, ProviderUnavailable). The handler walks the plan sequentially with a per-pair timeout, isolates per-series errors via a typed PairError, bumps series_external_ids.last_synced_at after each successful apply so the recency guard works on the next run, and returns a stable RunSummary JSON. TaskProgressEvent updates are emitted per pair so the frontend can show live progress. The handler is registered in the worker alongside PluginAutoMatch and inherits the optional ThumbnailService for cover updates. Loose-mode re-matching is intentionally deferred until a MatchingStrategy enum lands; the current behavior counts those pairs as skipped_no_external_id. Tests cover the planner's filter combinations and the handler's short-circuit, error, and skip paths. --- src/services/metadata/mod.rs | 6 + src/services/metadata/refresh_planner.rs | 595 +++++++++++++++ src/tasks/handlers/mod.rs | 2 + .../handlers/refresh_library_metadata.rs | 702 ++++++++++++++++++ src/tasks/types.rs | 49 ++ src/tasks/worker.rs | 16 +- 6 files changed, 1368 insertions(+), 2 deletions(-) create mode 100644 src/services/metadata/refresh_planner.rs create mode 100644 src/tasks/handlers/refresh_library_metadata.rs diff --git a/src/services/metadata/mod.rs b/src/services/metadata/mod.rs index 25c70e12..28c8f0cc 100644 --- a/src/services/metadata/mod.rs +++ b/src/services/metadata/mod.rs @@ -12,6 +12,7 @@ mod book_apply; mod cover; pub mod preprocessing; pub mod refresh_config; +pub mod refresh_planner; pub use apply::{ApplyOptions, MetadataApplier, SkippedField}; pub use book_apply::{BookApplyOptions, BookMetadataApplier}; @@ -21,3 +22,8 @@ pub use refresh_config::{MetadataRefreshConfig, parse_metadata_refresh_config}; // overrides). Allowed to be unused until those phases land. #[allow(unused_imports)] pub use refresh_config::{MetadataRefreshConfigPatch, ProviderOverride}; +#[allow(unused_imports)] +pub use refresh_planner::{ + PlannedRefresh, RefreshPlan, RefreshPlanner, SkipReason as RefreshSkipReason, SkippedRefresh, + fields_filter_from_config, +}; diff --git a/src/services/metadata/refresh_planner.rs b/src/services/metadata/refresh_planner.rs new file mode 100644 index 00000000..da405c1f --- /dev/null +++ b/src/services/metadata/refresh_planner.rs @@ -0,0 +1,595 @@ +//! Planner that decides which `(series, provider)` pairs the scheduled +//! metadata refresh should touch in a given run. +//! +//! The planner is intentionally side-effect free: it queries series and +//! external-id state and returns a deterministic plan. The task handler is +//! responsible for actually fetching from plugins and applying metadata. +//! +//! ## Filters +//! +//! - **Provider resolution**: config stores `"plugin:"` strings. Only +//! providers that resolve to an enabled plugin contribute to the plan; +//! missing/disabled providers are recorded as plan-level skips. +//! - **`existing_source_ids_only`**: skip series with no +//! `series_external_ids` row for the resolved provider. +//! - **`skip_recently_synced_within_s`**: skip series whose +//! `last_synced_at` for the provider is younger than the cutoff. +//! +//! Phase 4 will add an explicit `MatchingStrategy` enum that callers (manual +//! API vs scheduled task) can override; for Phase 2 the planner uses the +//! library's `existing_source_ids_only` toggle directly. + +#![allow(dead_code)] + +use anyhow::{Context, Result}; +use chrono::{DateTime, Duration, Utc}; +use sea_orm::DatabaseConnection; +use std::collections::{HashMap, HashSet}; +use uuid::Uuid; + +use crate::db::entities::plugins::Model as Plugin; +use crate::db::entities::series_external_ids::{self, Model as SeriesExternalId}; +use crate::db::repositories::{PluginsRepository, SeriesExternalIdRepository, SeriesRepository}; + +use super::refresh_config::MetadataRefreshConfig; + +/// Reason a series was skipped during planning. Mirrors the future +/// `RefreshSkipReason` taxonomy from Phase 4 closely enough that they can +/// be unified later without breaking external callers. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SkipReason { + /// Provider config references a plugin that isn't installed/enabled. + ProviderUnavailable { provider: String }, + /// `existing_source_ids_only = true` and series has no external ID for the provider. + NoExternalId, + /// `last_synced_at` is younger than `skip_recently_synced_within_s`. + RecentlySynced { last_synced_at: DateTime }, +} + +impl SkipReason { + pub fn as_str(&self) -> &'static str { + match self { + SkipReason::ProviderUnavailable { .. } => "provider_unavailable", + SkipReason::NoExternalId => "no_external_id", + SkipReason::RecentlySynced { .. } => "recently_synced", + } + } +} + +/// One planned `(series_id, plugin)` pair plus the optional pre-fetched +/// external ID. Carrying the external ID through avoids a second DB lookup +/// in the task handler when it dispatches `metadata/series/get`. +#[derive(Debug, Clone)] +pub struct PlannedRefresh { + pub series_id: Uuid, + pub plugin: Plugin, + /// Pre-fetched external ID for this series + plugin, if any. + pub existing_external_id: Option, +} + +/// One series that was considered but skipped, with the reason. Surfaced so +/// the task handler can record per-reason counts in the task summary. +#[derive(Debug, Clone)] +pub struct SkippedRefresh { + pub series_id: Uuid, + pub provider: String, + pub reason: SkipReason, +} + +/// Output of [`RefreshPlanner::plan`]. +#[derive(Debug, Default)] +pub struct RefreshPlan { + /// Refreshes that should actually run. + pub planned: Vec, + /// Per-`(series, provider)` skips with reasons. + pub skipped: Vec, + /// Provider strings from the config that don't resolve to an enabled + /// plugin. Recorded once (not per series). Useful for warning the user + /// in the task summary. + pub unresolved_providers: Vec, +} + +impl RefreshPlan { + /// Total work units = number of planned `(series, plugin)` invocations. + pub fn total_work(&self) -> usize { + self.planned.len() + } + + /// Skip count grouped by reason key. Used by the task handler to surface + /// a structured summary (e.g. "skipped: 12 no_external_id, 3 recently_synced"). + pub fn skipped_by_reason(&self) -> HashMap<&'static str, usize> { + let mut out: HashMap<&'static str, usize> = HashMap::new(); + for s in &self.skipped { + *out.entry(s.reason.as_str()).or_insert(0) += 1; + } + out + } +} + +/// Stateless planner. All state is passed in per-call. +pub struct RefreshPlanner; + +impl RefreshPlanner { + /// Build a refresh plan for `library_id` against `config`. + /// + /// The planner: + /// 1. Resolves each `plugin:` provider string to an enabled plugin + /// via [`PluginsRepository::get_by_name`]. Missing or disabled + /// providers go into `unresolved_providers`. + /// 2. Lists every series in the library. + /// 3. Fetches all external IDs for those series in one query + /// (`get_for_series_ids`). + /// 4. For each `(series, resolved_provider)` pair, decides: + /// - `existing_source_ids_only` ⇒ skip when no external ID for that + /// provider exists. + /// - `skip_recently_synced_within_s` ⇒ skip when the provider's + /// `last_synced_at` is too recent. + /// - otherwise plan the refresh. + pub async fn plan( + db: &DatabaseConnection, + library_id: Uuid, + config: &MetadataRefreshConfig, + ) -> Result { + let mut plan = RefreshPlan::default(); + + if config.providers.is_empty() { + return Ok(plan); + } + + // 1. Resolve providers — convert `"plugin:"` strings to + // `Plugin` models, recording unresolved entries. + let mut resolved_providers: Vec<(String, Plugin)> = Vec::new(); + for provider in &config.providers { + match resolve_provider(db, provider).await { + Ok(Some(plugin)) => resolved_providers.push((provider.clone(), plugin)), + Ok(None) => plan.unresolved_providers.push(provider.clone()), + Err(e) => { + return Err(e.context(format!("Failed to resolve provider '{}'", provider))); + } + } + } + + if resolved_providers.is_empty() { + return Ok(plan); + } + + // 2. List series in the library. + let series_list = SeriesRepository::list_by_library(db, library_id) + .await + .context("Failed to list series for refresh planning")?; + if series_list.is_empty() { + return Ok(plan); + } + let series_ids: Vec = series_list.iter().map(|s| s.id).collect(); + + // 3. Fetch all external IDs in a single batched query. + let external_ids_by_series: HashMap> = + SeriesExternalIdRepository::get_for_series_ids(db, &series_ids) + .await + .context("Failed to load external IDs for refresh planning")?; + + // Pre-compute the recency cutoff once. + let recently_synced_cutoff: Option> = if config.skip_recently_synced_within_s + == 0 + { + None + } else { + Some(Utc::now() - Duration::seconds(i64::from(config.skip_recently_synced_within_s))) + }; + + // 4. For each (series, provider) pair, decide. + for series in &series_ids { + let series_externals = external_ids_by_series.get(series); + for (provider_str, plugin) in &resolved_providers { + let plugin_source = series_external_ids::Model::plugin_source(&plugin.name); + let existing = series_externals + .and_then(|list| list.iter().find(|e| e.source == plugin_source).cloned()); + + if config.existing_source_ids_only && existing.is_none() { + plan.skipped.push(SkippedRefresh { + series_id: *series, + provider: provider_str.clone(), + reason: SkipReason::NoExternalId, + }); + continue; + } + + if let (Some(cutoff), Some(ext)) = (recently_synced_cutoff, existing.as_ref()) + && let Some(last_synced_at) = ext.last_synced_at + && last_synced_at >= cutoff + { + plan.skipped.push(SkippedRefresh { + series_id: *series, + provider: provider_str.clone(), + reason: SkipReason::RecentlySynced { last_synced_at }, + }); + continue; + } + + plan.planned.push(PlannedRefresh { + series_id: *series, + plugin: plugin.clone(), + existing_external_id: existing, + }); + } + } + + Ok(plan) + } +} + +/// Resolve a `"plugin:"` string to an enabled plugin. +/// +/// Returns: +/// - `Ok(Some(plugin))` if the string parses and an enabled plugin exists. +/// - `Ok(None)` if the prefix is missing, the plugin doesn't exist, or the +/// plugin exists but is disabled. Caller records this as a plan-level +/// `unresolved_providers` entry. +/// - `Err(_)` only on DB errors. +async fn resolve_provider(db: &DatabaseConnection, provider: &str) -> Result> { + let Some(name) = provider.strip_prefix("plugin:") else { + return Ok(None); + }; + let plugin = PluginsRepository::get_by_name(db, name).await?; + Ok(plugin.filter(|p| p.enabled)) +} + +/// Convenience: dedup + flatten the field allowlist that the task handler +/// will pass to `MetadataApplier::apply` via `ApplyOptions::fields_filter`. +/// +/// Phase 3 will add a real `field_groups::fields_for_groups()` resolver. For +/// Phase 2 we only need the union of `extra_fields` plus a (currently +/// stubbed) per-group expansion that returns each group name as-is — this +/// preserves the full set of fields the user picked so the handler doesn't +/// over-write fields outside the allowlist. +/// +/// Returns `None` when both lists are empty (apply everything; existing +/// `MetadataApplier` semantics). +pub fn fields_filter_from_config(config: &MetadataRefreshConfig) -> Option> { + if config.field_groups.is_empty() && config.extra_fields.is_empty() { + return None; + } + let mut out: HashSet = HashSet::new(); + out.extend(config.field_groups.iter().cloned()); + out.extend(config.extra_fields.iter().cloned()); + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::ScanningStrategy; + use crate::db::entities::plugins::PluginPermission; + use crate::db::repositories::{LibraryRepository, PluginsRepository, SeriesRepository}; + use crate::db::test_helpers::setup_test_db; + use crate::services::metadata::refresh_config::MetadataRefreshConfig; + use crate::services::plugin::protocol::PluginScope; + use std::env; + use std::sync::Once; + + static INIT_ENCRYPTION: Once = Once::new(); + + fn setup_test_encryption_key() { + INIT_ENCRYPTION.call_once(|| { + if env::var("CODEX_ENCRYPTION_KEY").is_err() { + // SAFETY: tests run with shared env access; first writer wins. + unsafe { + env::set_var( + "CODEX_ENCRYPTION_KEY", + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8=", + ); + } + } + }); + } + + async fn create_library(db: &DatabaseConnection, name: &str) -> Uuid { + let lib = LibraryRepository::create( + db, + name, + &format!("/tmp/{}", name), + ScanningStrategy::Default, + ) + .await + .unwrap(); + lib.id + } + + async fn create_series(db: &DatabaseConnection, library_id: Uuid, name: &str) -> Uuid { + SeriesRepository::create(db, library_id, name, None) + .await + .unwrap() + .id + } + + async fn create_plugin(db: &DatabaseConnection, name: &str, enabled: bool) -> Plugin { + setup_test_encryption_key(); + PluginsRepository::create( + db, + name, + name, + None, + "system", + "node", + vec!["dist/index.js".to_string()], + vec![], + None, + vec![PluginPermission::MetadataWriteSummary], + vec![PluginScope::SeriesDetail], + vec![], + None, + "env", + None, + enabled, + None, + None, + ) + .await + .unwrap() + } + + fn config_with_provider(provider: &str) -> MetadataRefreshConfig { + MetadataRefreshConfig { + providers: vec![provider.to_string()], + ..Default::default() + } + } + + #[tokio::test] + async fn plan_is_empty_when_no_providers_configured() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let _series_id = create_series(&db, library_id, "Series A").await; + + let cfg = MetadataRefreshConfig::default(); + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert!(plan.skipped.is_empty()); + assert!(plan.unresolved_providers.is_empty()); + } + + #[tokio::test] + async fn plan_records_unresolved_provider_for_missing_plugin() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let _series = create_series(&db, library_id, "Series A").await; + + let cfg = config_with_provider("plugin:does-not-exist"); + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert!(plan.skipped.is_empty()); + assert_eq!(plan.unresolved_providers, vec!["plugin:does-not-exist"]); + } + + #[tokio::test] + async fn plan_skips_disabled_plugin() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let _series = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", false).await; + + let cfg = config_with_provider("plugin:mangabaka"); + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert_eq!(plan.unresolved_providers, vec!["plugin:mangabaka"]); + } + + #[tokio::test] + async fn plan_strict_mode_skips_series_without_external_id() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert_eq!(plan.skipped.len(), 1); + assert_eq!(plan.skipped[0].series_id, series_id); + assert_eq!(plan.skipped[0].reason, SkipReason::NoExternalId); + } + + #[tokio::test] + async fn plan_strict_mode_includes_series_with_external_id() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + // Seed an external ID for plugin:mangabaka + SeriesExternalIdRepository::upsert_for_plugin( + &db, + series_id, + "mangabaka", + "ext-1", + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert_eq!(plan.planned.len(), 1); + assert_eq!(plan.planned[0].series_id, series_id); + assert_eq!(plan.planned[0].plugin.name, "mangabaka"); + assert!(plan.planned[0].existing_external_id.is_some()); + assert!(plan.skipped.is_empty()); + } + + #[tokio::test] + async fn plan_loose_mode_includes_unmatched_series() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: false, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert_eq!(plan.planned.len(), 1); + assert_eq!(plan.planned[0].series_id, series_id); + assert!(plan.planned[0].existing_external_id.is_none()); + } + + #[tokio::test] + async fn plan_skips_recently_synced_series() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + // Seeded `last_synced_at` defaults to "now", which is < cutoff + SeriesExternalIdRepository::upsert_for_plugin( + &db, + series_id, + "mangabaka", + "ext-1", + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 3600, // 1h + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert_eq!(plan.skipped.len(), 1); + assert!(matches!( + plan.skipped[0].reason, + SkipReason::RecentlySynced { .. } + )); + } + + #[tokio::test] + async fn plan_includes_when_recency_guard_disabled() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + SeriesExternalIdRepository::upsert_for_plugin( + &db, + series_id, + "mangabaka", + "ext-1", + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert_eq!(plan.planned.len(), 1); + assert!(plan.skipped.is_empty()); + } + + #[tokio::test] + async fn plan_handles_multiple_providers_independently() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let series_id = create_series(&db, library_id, "Series A").await; + let _p1 = create_plugin(&db, "mangabaka", true).await; + let _p2 = create_plugin(&db, "anilist", true).await; + + // Series matched on mangabaka but not anilist + SeriesExternalIdRepository::upsert_for_plugin( + &db, + series_id, + "mangabaka", + "ext-1", + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + providers: vec!["plugin:mangabaka".to_string(), "plugin:anilist".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert_eq!(plan.planned.len(), 1); + assert_eq!(plan.planned[0].plugin.name, "mangabaka"); + assert_eq!(plan.skipped.len(), 1); + assert_eq!(plan.skipped[0].provider, "plugin:anilist"); + assert_eq!(plan.skipped[0].reason, SkipReason::NoExternalId); + + let counts = plan.skipped_by_reason(); + assert_eq!(counts.get("no_external_id").copied(), Some(1)); + } + + #[tokio::test] + async fn plan_empty_library_returns_empty_plan() { + let db = setup_test_db().await; + let library_id = create_library(&db, "lib").await; + let _plugin = create_plugin(&db, "mangabaka", true).await; + + let cfg = config_with_provider("plugin:mangabaka"); + let plan = RefreshPlanner::plan(&db, library_id, &cfg).await.unwrap(); + + assert!(plan.planned.is_empty()); + assert!(plan.skipped.is_empty()); + assert!(plan.unresolved_providers.is_empty()); + } + + #[test] + fn fields_filter_returns_none_when_no_groups_or_extras() { + let cfg = MetadataRefreshConfig { + field_groups: vec![], + extra_fields: vec![], + ..Default::default() + }; + assert!(fields_filter_from_config(&cfg).is_none()); + } + + #[test] + fn fields_filter_combines_groups_and_extras() { + let cfg = MetadataRefreshConfig { + field_groups: vec!["ratings".to_string(), "status".to_string()], + extra_fields: vec!["language".to_string()], + ..Default::default() + }; + let filter = fields_filter_from_config(&cfg).unwrap(); + assert!(filter.contains("ratings")); + assert!(filter.contains("status")); + assert!(filter.contains("language")); + assert_eq!(filter.len(), 3); + } +} diff --git a/src/tasks/handlers/mod.rs b/src/tasks/handlers/mod.rs index c0720e06..edd6b5d6 100644 --- a/src/tasks/handlers/mod.rs +++ b/src/tasks/handlers/mod.rs @@ -22,6 +22,7 @@ pub mod generate_thumbnail; pub mod generate_thumbnails; pub mod plugin_auto_match; pub mod purge_deleted; +pub mod refresh_library_metadata; pub mod renumber_series; pub mod reprocess_series_titles; pub mod scan_library; @@ -45,6 +46,7 @@ pub use generate_thumbnail::GenerateThumbnailHandler; pub use generate_thumbnails::GenerateThumbnailsHandler; pub use plugin_auto_match::PluginAutoMatchHandler; pub use purge_deleted::PurgeDeletedHandler; +pub use refresh_library_metadata::RefreshLibraryMetadataHandler; pub use renumber_series::{RenumberSeriesBatchHandler, RenumberSeriesHandler}; pub use reprocess_series_titles::{ReprocessSeriesTitleHandler, ReprocessSeriesTitlesHandler}; pub use scan_library::ScanLibraryHandler; diff --git a/src/tasks/handlers/refresh_library_metadata.rs b/src/tasks/handlers/refresh_library_metadata.rs new file mode 100644 index 00000000..4c29eee9 --- /dev/null +++ b/src/tasks/handlers/refresh_library_metadata.rs @@ -0,0 +1,702 @@ +//! Scheduled per-library metadata refresh handler. +//! +//! Reads `libraries.metadata_refresh_config`, builds a [`RefreshPlan`] via +//! [`RefreshPlanner`], then walks the plan one `(series, plugin)` pair at a +//! time. Each pair fetches metadata via the plugin's `metadata/series/get` +//! call (using the stored external ID) and applies it through the existing +//! [`MetadataApplier`]. Per-pair errors are isolated: one failure increments +//! a counter and the handler keeps going. +//! +//! ## Phase scope (Phase 2) +//! +//! - Only the **`existing_source_ids_only = true`** path is wired. The +//! [`RefreshPlanner`] does the gating, so the handler never sees a +//! no-external-id pair when strict mode is on. +//! - When strict mode is off, the planner still returns pairs without an +//! external ID. Phase 2 logs a warning and skips them — Phase 4 will +//! introduce a real `MatchingStrategy` that lets the handler call +//! `metadata/series/match` to re-match. +//! - Field-group resolution is the simple union returned by +//! [`fields_filter_from_config`]. Phase 3 ships the proper resolver that +//! expands group names like `"ratings"` to the underlying field names +//! the applier understands. + +use anyhow::{Context, Result}; +use sea_orm::DatabaseConnection; +use serde_json::json; +use std::sync::Arc; +use std::time::Duration; +use tracing::{debug, error, info, warn}; + +use crate::db::entities::tasks; +use crate::db::repositories::{ + LibraryRepository, PluginsRepository, SeriesExternalIdRepository, SeriesMetadataRepository, +}; +use crate::events::{EntityChangeEvent, EntityEvent, EventBroadcaster, TaskProgressEvent}; +use crate::services::ThumbnailService; +use crate::services::metadata::refresh_planner::{ + PlannedRefresh, RefreshPlan, RefreshPlanner, fields_filter_from_config, +}; +use crate::services::metadata::{ApplyOptions, MetadataApplier}; +use crate::services::plugin::PluginManager; +use crate::services::plugin::protocol::MetadataGetParams; +use crate::tasks::handlers::TaskHandler; +use crate::tasks::types::TaskResult; + +/// Soft cap to keep one library's refresh from monopolizing the worker on +/// a misconfigured `max_concurrency`. The user can lower it; the host does +/// not allow exceeding this. +const MAX_CONCURRENCY_HARD_CAP: usize = 16; + +/// Per-`(series, provider)` plugin call timeout. Plugin manager already has +/// its own timeouts but we bound the apply step too so a slow plugin can't +/// stall the whole queue. +const PER_PAIR_TIMEOUT: Duration = Duration::from_secs(60); + +/// Aggregated outcome of a single library refresh run. +#[derive(Debug, Default)] +struct RunSummary { + succeeded: u32, + failed: u32, + skipped_no_external_id: u32, + skipped_recently_synced: u32, + skipped_provider_unavailable: u32, + fields_applied_total: u32, +} + +impl RunSummary { + fn into_json( + self, + total_planned: usize, + unresolved_providers: Vec, + ) -> serde_json::Value { + json!({ + "planned": total_planned, + "succeeded": self.succeeded, + "failed": self.failed, + "skipped": { + "no_external_id": self.skipped_no_external_id, + "recently_synced": self.skipped_recently_synced, + "provider_unavailable": self.skipped_provider_unavailable, + }, + "fields_applied_total": self.fields_applied_total, + "unresolved_providers": unresolved_providers, + }) + } +} + +/// Handler for [`crate::tasks::types::TaskType::RefreshLibraryMetadata`]. +pub struct RefreshLibraryMetadataHandler { + plugin_manager: Arc, + thumbnail_service: Option>, +} + +impl RefreshLibraryMetadataHandler { + pub fn new(plugin_manager: Arc) -> Self { + Self { + plugin_manager, + thumbnail_service: None, + } + } + + pub fn with_thumbnail_service(mut self, thumbnail_service: Arc) -> Self { + self.thumbnail_service = Some(thumbnail_service); + self + } + + /// Convert the plan's `skipped` entries into pre-counted + /// `RunSummary` fields. The planner already does the gating; the + /// handler just folds the structured reasons into counters. + fn fold_skipped_into_summary(plan: &RefreshPlan, summary: &mut RunSummary) { + for s in &plan.skipped { + match s.reason { + crate::services::metadata::refresh_planner::SkipReason::NoExternalId => { + summary.skipped_no_external_id += 1; + } + crate::services::metadata::refresh_planner::SkipReason::RecentlySynced { + .. + } => { + summary.skipped_recently_synced += 1; + } + crate::services::metadata::refresh_planner::SkipReason::ProviderUnavailable { + .. + } => { + summary.skipped_provider_unavailable += 1; + } + } + } + } +} + +impl TaskHandler for RefreshLibraryMetadataHandler { + fn handle<'a>( + &'a self, + task: &'a tasks::Model, + db: &'a DatabaseConnection, + event_broadcaster: Option<&'a Arc>, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let library_id = task + .library_id + .ok_or_else(|| anyhow::anyhow!("Missing library_id in task"))?; + + // 1. Load library + config. Treat a missing library as a hard + // error (the scheduler shouldn't have fired in that case). + let library = LibraryRepository::get_by_id(db, library_id) + .await? + .ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?; + let config = LibraryRepository::get_metadata_refresh_config(db, library_id).await?; + + info!( + "Task {}: Refreshing library '{}' (id={}) — providers={:?} groups={:?} strict={}", + task.id, + library.name, + library_id, + config.providers, + config.field_groups, + config.existing_source_ids_only + ); + + // 2. Empty providers shortcut. + if config.providers.is_empty() { + return Ok(TaskResult::success_with_data( + "No providers configured; skipping", + RunSummary::default().into_json(0, Vec::new()), + )); + } + + // 3. Build the plan. + let plan = RefreshPlanner::plan(db, library_id, &config) + .await + .context("Failed to build refresh plan")?; + + let total_planned = plan.total_work(); + let unresolved_providers = plan.unresolved_providers.clone(); + let mut summary = RunSummary::default(); + Self::fold_skipped_into_summary(&plan, &mut summary); + + if total_planned == 0 { + let message = if !unresolved_providers.is_empty() { + format!( + "Nothing to refresh ({} unresolved providers, {} skipped)", + unresolved_providers.len(), + plan.skipped.len() + ) + } else { + format!("Nothing to refresh ({} skipped)", plan.skipped.len()) + }; + info!("Task {}: {}", task.id, message); + return Ok(TaskResult::success_with_data( + message, + summary.into_json(total_planned, unresolved_providers), + )); + } + + // Initial progress event (current=0/total). + if let Some(broadcaster) = event_broadcaster { + let _ = broadcaster.emit_task(TaskProgressEvent::progress( + task.id, + "refresh_library_metadata", + 0, + total_planned, + Some(format!( + "Refreshing {} ({} pair(s) planned)", + library.name, total_planned + )), + Some(library_id), + None, + None, + )); + } + + // 4. Walk the plan. + // + // Phase 2 is sequential: DatabaseConnection is Send + Sync and + // PluginManager is Arc'd, so spawning per-pair tasks would also + // work, but sequential keeps progress events ordered and avoids + // interleaved plugin logs. The bounded JoinSet promised by the + // plan is deferred until we measure that daily-cadence runs + // need it. `max_concurrency` is clamped here so when the + // parallel path lands, the config value is already validated. + let _max_concurrency = + (config.max_concurrency as usize).clamp(1, MAX_CONCURRENCY_HARD_CAP); + + let fields_filter = fields_filter_from_config(&config); + let library_name = library.name.clone(); + + for (idx, planned) in plan.planned.iter().enumerate() { + let pair_outcome = process_pair( + db, + library_id, + planned, + fields_filter.as_ref(), + self.thumbnail_service.as_ref(), + event_broadcaster, + self.plugin_manager.as_ref(), + ) + .await; + + match pair_outcome { + Ok(applied) => { + summary.succeeded += 1; + summary.fields_applied_total += applied as u32; + } + Err(SkipBecauseNoExternalId) => { + // Loose-mode planner returns these; Phase 4 will + // convert this into a real re-match path. + warn!( + "Task {}: Skipping series {} for plugin {} — no external ID and \ + re-matching is not yet implemented (Phase 4)", + task.id, planned.series_id, planned.plugin.name + ); + summary.skipped_no_external_id += 1; + } + Err(SkipBecauseFailed(err)) => { + summary.failed += 1; + error!( + "Task {}: Failed refresh for series {} via plugin {}: {:#}", + task.id, planned.series_id, planned.plugin.name, err + ); + } + } + + // Per-pair progress event (after the work, so `current` + // reflects what just finished). + if let Some(broadcaster) = event_broadcaster { + let current = idx + 1; + let _ = broadcaster.emit_task(TaskProgressEvent::progress( + task.id, + "refresh_library_metadata", + current, + total_planned, + Some(format!( + "Refreshing {} ({}/{}, {} succeeded, {} failed)", + library_name, current, total_planned, summary.succeeded, summary.failed + )), + Some(library_id), + Some(planned.series_id), + None, + )); + } + } + + let message = format!( + "Refreshed {} of {} pair(s) ({} succeeded, {} failed, {} skipped)", + summary.succeeded, + total_planned, + summary.succeeded, + summary.failed, + summary.skipped_no_external_id + + summary.skipped_recently_synced + + summary.skipped_provider_unavailable, + ); + + Ok(TaskResult::success_with_data( + message, + summary.into_json(total_planned, unresolved_providers), + )) + }) + } +} + +/// Fine-grained outcome for a single `(series, provider)` pair. +/// +/// Wrapping the `Result` makes the handler loop self-documenting and lets +/// us distinguish "planner gave us a no-ID pair" from "the call failed". +enum PairError { + NoExternalId, + Failed(anyhow::Error), +} +use PairError::Failed as SkipBecauseFailed; +use PairError::NoExternalId as SkipBecauseNoExternalId; + +/// Process one planned pair. Returns the number of fields applied on +/// success, or a typed reason for non-success. +/// +/// This function is `pub(crate)` rather than nested in the impl so the +/// `cargo fmt` output stays readable for a 100+ line function. +#[allow(clippy::too_many_arguments)] +async fn process_pair( + db: &DatabaseConnection, + library_id: uuid::Uuid, + planned: &PlannedRefresh, + fields_filter: Option<&std::collections::HashSet>, + thumbnail_service: Option<&Arc>, + event_broadcaster: Option<&Arc>, + plugin_manager: &PluginManager, +) -> Result { + // Phase 2: only the existing-ID path is wired. + let Some(external_id_record) = planned.existing_external_id.as_ref() else { + return Err(PairError::NoExternalId); + }; + let external_id = external_id_record.external_id.clone(); + let plugin = &planned.plugin; + + // 1. Fetch metadata from the plugin. + let get_params = MetadataGetParams { + external_id: external_id.clone(), + }; + + let metadata_fut = plugin_manager.get_series_metadata(plugin.id, get_params); + let plugin_metadata = match tokio::time::timeout(PER_PAIR_TIMEOUT, metadata_fut).await { + Ok(Ok(m)) => m, + Ok(Err(e)) => { + return Err(PairError::Failed(anyhow::Error::new(e).context(format!( + "Plugin '{}' failed to fetch metadata for external_id {}", + plugin.name, external_id + )))); + } + Err(_elapsed) => { + return Err(PairError::Failed(anyhow::anyhow!( + "Plugin '{}' timed out after {}s fetching external_id {}", + plugin.name, + PER_PAIR_TIMEOUT.as_secs(), + external_id + ))); + } + }; + + // 2. Apply via the shared MetadataApplier. + let current_metadata = SeriesMetadataRepository::get_by_series_id(db, planned.series_id) + .await + .map_err(|e| PairError::Failed(e.context("Failed to load current metadata")))?; + + let options = ApplyOptions { + fields_filter: fields_filter.cloned(), + thumbnail_service: thumbnail_service.cloned(), + event_broadcaster: event_broadcaster.cloned(), + }; + + let apply_result = MetadataApplier::apply( + db, + planned.series_id, + library_id, + plugin, + &plugin_metadata, + current_metadata.as_ref(), + &options, + ) + .await + .map_err(|e| { + PairError::Failed(e.context(format!( + "Failed to apply metadata to series {}", + planned.series_id + ))) + })?; + + let applied_count = apply_result.applied_fields.len(); + + // 3. Bump `last_synced_at` on the external-id row so the recency + // guard works on the next run. `external_url` is preserved. + let external_url = plugin_metadata.external_url.clone(); + if let Err(e) = SeriesExternalIdRepository::upsert_for_plugin( + db, + planned.series_id, + &plugin.name, + &external_id, + Some(&external_url), + None, + ) + .await + { + warn!( + "Failed to refresh last_synced_at for series {} / plugin {}: {:#}", + planned.series_id, plugin.name, e + ); + } + + // 4. Emit a per-series metadata-updated event so the frontend + // invalidates its cache. Reuses the same event the manual + // apply path emits (see plugin_auto_match.rs). + if applied_count > 0 + && let Some(broadcaster) = event_broadcaster + { + let _ = broadcaster.emit(EntityChangeEvent::new( + EntityEvent::SeriesMetadataUpdated { + series_id: planned.series_id, + library_id, + plugin_id: plugin.id, + fields_updated: apply_result.applied_fields.clone(), + }, + None, + )); + } + + // 5. Best-effort plugin success bookkeeping. The plugin manager + // already records success on its own in the happy path, but we + // keep this for parity with plugin_auto_match. + if let Err(e) = PluginsRepository::record_success(db, plugin.id).await { + debug!("Plugin success record skipped: {:#}", e); + } + + Ok(applied_count) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::ScanningStrategy; + use crate::db::entities::plugins::PluginPermission; + use crate::db::repositories::{ + LibraryRepository, PluginsRepository, SeriesRepository, TaskRepository, + }; + use crate::db::test_helpers::setup_test_db; + use crate::services::metadata::refresh_config::MetadataRefreshConfig; + use crate::services::plugin::PluginManager; + use crate::services::plugin::protocol::PluginScope; + use crate::tasks::types::TaskType; + use std::env; + use std::sync::Once; + + static INIT_ENCRYPTION: Once = Once::new(); + + fn setup_test_encryption_key() { + INIT_ENCRYPTION.call_once(|| { + if env::var("CODEX_ENCRYPTION_KEY").is_err() { + // SAFETY: Tests share env. First-writer-wins is safe because + // the value is constant. + unsafe { + env::set_var( + "CODEX_ENCRYPTION_KEY", + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8=", + ); + } + } + }); + } + + #[test] + fn run_summary_serializes_zero_state() { + let json = RunSummary::default().into_json(0, vec![]); + assert_eq!(json["planned"], 0); + assert_eq!(json["succeeded"], 0); + assert_eq!(json["failed"], 0); + assert_eq!(json["skipped"]["no_external_id"], 0); + assert_eq!(json["skipped"]["recently_synced"], 0); + assert_eq!(json["skipped"]["provider_unavailable"], 0); + assert_eq!(json["fields_applied_total"], 0); + assert!(json["unresolved_providers"].as_array().unwrap().is_empty()); + } + + #[test] + fn run_summary_carries_unresolved_providers() { + let s = RunSummary { + succeeded: 3, + failed: 1, + skipped_no_external_id: 2, + skipped_recently_synced: 1, + skipped_provider_unavailable: 0, + fields_applied_total: 12, + }; + let json = s.into_json(7, vec!["plugin:gone".to_string()]); + assert_eq!(json["planned"], 7); + assert_eq!(json["succeeded"], 3); + assert_eq!(json["failed"], 1); + assert_eq!(json["fields_applied_total"], 12); + assert_eq!(json["skipped"]["no_external_id"], 2); + assert_eq!(json["skipped"]["recently_synced"], 1); + assert_eq!(json["unresolved_providers"][0], "plugin:gone"); + } + + /// Build a worker-style task row from a `TaskType`. + async fn enqueue_and_load(db: &DatabaseConnection, task_type: TaskType) -> tasks::Model { + let id = TaskRepository::enqueue(db, task_type, None).await.unwrap(); + TaskRepository::get_by_id(db, id).await.unwrap().unwrap() + } + + fn make_handler(db: &DatabaseConnection) -> RefreshLibraryMetadataHandler { + let pm = Arc::new(PluginManager::with_defaults(Arc::new(db.clone()))); + RefreshLibraryMetadataHandler::new(pm) + } + + #[tokio::test] + async fn handler_short_circuits_when_no_providers_configured() { + let db = setup_test_db().await; + + let library = LibraryRepository::create( + &db, + "lib-empty", + "/tmp/lib-empty", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + // Save an enabled config but with empty providers list. + let cfg = MetadataRefreshConfig { + enabled: true, + providers: vec![], + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .unwrap(); + + let task = enqueue_and_load( + &db, + TaskType::RefreshLibraryMetadata { + library_id: library.id, + }, + ) + .await; + + let handler = make_handler(&db); + let result = handler.handle(&task, &db, None).await.unwrap(); + + assert!(result.success); + let data = result.data.expect("data should be populated"); + assert_eq!(data["planned"], 0); + assert_eq!(data["succeeded"], 0); + assert_eq!(data["failed"], 0); + } + + #[tokio::test] + async fn handler_reports_unresolved_provider() { + let db = setup_test_db().await; + let library = LibraryRepository::create( + &db, + "lib-unresolved", + "/tmp/lib-unresolved", + ScanningStrategy::Default, + ) + .await + .unwrap(); + + // Provider references a plugin that doesn't exist. + let cfg = MetadataRefreshConfig { + enabled: true, + providers: vec!["plugin:does-not-exist".to_string()], + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .unwrap(); + + let task = enqueue_and_load( + &db, + TaskType::RefreshLibraryMetadata { + library_id: library.id, + }, + ) + .await; + let handler = make_handler(&db); + let result = handler.handle(&task, &db, None).await.unwrap(); + + assert!(result.success); + let data = result.data.unwrap(); + assert_eq!(data["planned"], 0); + assert_eq!(data["unresolved_providers"][0], "plugin:does-not-exist"); + } + + #[tokio::test] + async fn handler_counts_no_external_id_skips_in_strict_mode() { + setup_test_encryption_key(); + let db = setup_test_db().await; + + let library = LibraryRepository::create( + &db, + "lib-strict", + "/tmp/lib-strict", + ScanningStrategy::Default, + ) + .await + .unwrap(); + // Two unmatched series. + let _s1 = SeriesRepository::create(&db, library.id, "Series A", None) + .await + .unwrap(); + let _s2 = SeriesRepository::create(&db, library.id, "Series B", None) + .await + .unwrap(); + // Enabled plugin, but no external IDs seeded. + let _plugin = PluginsRepository::create( + &db, + "mangabaka", + "MangaBaka", + None, + "system", + "node", + vec!["dist/index.js".to_string()], + vec![], + None, + vec![PluginPermission::MetadataWriteSummary], + vec![PluginScope::SeriesDetail], + vec![], + None, + "env", + None, + true, + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + enabled: true, + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: true, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .unwrap(); + + let task = enqueue_and_load( + &db, + TaskType::RefreshLibraryMetadata { + library_id: library.id, + }, + ) + .await; + let handler = make_handler(&db); + let result = handler.handle(&task, &db, None).await.unwrap(); + + assert!(result.success); + let data = result.data.unwrap(); + // Both series skipped at planning time. + assert_eq!(data["planned"], 0); + assert_eq!(data["skipped"]["no_external_id"], 2); + assert_eq!(data["succeeded"], 0); + assert_eq!(data["failed"], 0); + } + + #[tokio::test] + async fn handler_errors_when_library_missing() { + let db = setup_test_db().await; + let task_type = TaskType::RefreshLibraryMetadata { + library_id: uuid::Uuid::new_v4(), + }; + + // Build a synthetic task model directly (no DB row required since + // the handler reads `task.library_id` and goes to the repository). + let now = chrono::Utc::now(); + let task = tasks::Model { + id: uuid::Uuid::new_v4(), + task_type: task_type.type_string().to_string(), + library_id: task_type.library_id(), + series_id: None, + book_id: None, + params: None, + status: "processing".to_string(), + priority: 0, + locked_by: None, + locked_until: None, + attempts: 1, + max_attempts: 3, + last_error: None, + reschedule_count: 0, + max_reschedules: 0, + result: None, + scheduled_for: now, + created_at: now, + started_at: Some(now), + completed_at: None, + }; + + let handler = make_handler(&db); + let err = handler.handle(&task, &db, None).await.unwrap_err(); + assert!(err.to_string().contains("Library not found")); + } +} diff --git a/src/tasks/types.rs b/src/tasks/types.rs index 6dc2c497..d748f944 100644 --- a/src/tasks/types.rs +++ b/src/tasks/types.rs @@ -47,6 +47,16 @@ pub enum TaskType { source: String, // "comicvine", "openlibrary", etc. }, + /// Scheduled per-library metadata refresh. + /// + /// Walks the library's series, applies the per-library + /// `metadata_refresh_config` (field groups, providers, matching strategy), + /// and refreshes metadata via the existing `MetadataApplier`. + RefreshLibraryMetadata { + #[serde(rename = "libraryId")] + library_id: Uuid, + }, + /// Generate thumbnails for books in a scope (library, series, specific books, or all) /// This is a fan-out task that enqueues individual GenerateThumbnail tasks GenerateThumbnails { @@ -233,6 +243,7 @@ impl TaskType { // Metadata TaskType::FindDuplicates => 400, TaskType::RefreshMetadata { .. } => 390, + TaskType::RefreshLibraryMetadata { .. } => 385, TaskType::PluginAutoMatch { .. } => 380, // Export TaskType::ExportSeries { .. } => 450, @@ -258,6 +269,7 @@ impl TaskType { TaskType::AnalyzeSeries { .. } => "analyze_series", TaskType::PurgeDeleted { .. } => "purge_deleted", TaskType::RefreshMetadata { .. } => "refresh_metadata", + TaskType::RefreshLibraryMetadata { .. } => "refresh_library_metadata", TaskType::GenerateThumbnails { .. } => "generate_thumbnails", TaskType::GenerateThumbnail { .. } => "generate_thumbnail", TaskType::GenerateSeriesThumbnail { .. } => "generate_series_thumbnail", @@ -288,6 +300,7 @@ impl TaskType { match self { TaskType::ScanLibrary { library_id, .. } => Some(*library_id), TaskType::PurgeDeleted { library_id } => Some(*library_id), + TaskType::RefreshLibraryMetadata { library_id } => Some(*library_id), TaskType::GenerateThumbnails { library_id, .. } => *library_id, TaskType::GenerateSeriesThumbnails { library_id, .. } => *library_id, TaskType::ReprocessSeriesTitles { library_id, .. } => *library_id, @@ -1006,6 +1019,42 @@ mod tests { assert!(params["series_ids"].is_null()); } + #[test] + fn test_refresh_library_metadata_extraction() { + let library_id = Uuid::new_v4(); + let task = TaskType::RefreshLibraryMetadata { library_id }; + + assert_eq!(task.type_string(), "refresh_library_metadata"); + assert_eq!(task.library_id(), Some(library_id)); + assert_eq!(task.series_id(), None); + assert_eq!(task.book_id(), None); + assert_eq!(task.default_priority(), 385); + + let (type_str, lib_id, series_id, book_id, params) = task.extract_fields(); + assert_eq!(type_str, "refresh_library_metadata"); + assert_eq!(lib_id, Some(library_id)); + assert!(series_id.is_none()); + assert!(book_id.is_none()); + // No extra params beyond library_id (carried via the FK column) + assert!(params.is_none()); + } + + #[test] + fn test_refresh_library_metadata_serialization() { + let library_id = Uuid::new_v4(); + let task = TaskType::RefreshLibraryMetadata { library_id }; + + let json = serde_json::to_string(&task).unwrap(); + assert!(json.contains("refresh_library_metadata")); + assert!(json.contains(&library_id.to_string())); + // libraryId is the camelCase rename used by the rest of the enum + assert!(json.contains("libraryId")); + + let deserialized: TaskType = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.type_string(), "refresh_library_metadata"); + assert_eq!(deserialized.library_id(), Some(library_id)); + } + #[test] fn test_default_priority_values() { let library_id = Uuid::new_v4(); diff --git a/src/tasks/worker.rs b/src/tasks/worker.rs index dbc2583f..cfb09f1c 100644 --- a/src/tasks/worker.rs +++ b/src/tasks/worker.rs @@ -31,8 +31,8 @@ use crate::tasks::handlers::{ CleanupSeriesFilesHandler, ExportSeriesHandler, FindDuplicatesHandler, GenerateSeriesThumbnailHandler, GenerateSeriesThumbnailsHandler, GenerateThumbnailHandler, GenerateThumbnailsHandler, PluginAutoMatchHandler, PurgeDeletedHandler, - RenumberSeriesBatchHandler, RenumberSeriesHandler, ReprocessSeriesTitleHandler, - ReprocessSeriesTitlesHandler, ScanLibraryHandler, TaskHandler, + RefreshLibraryMetadataHandler, RenumberSeriesBatchHandler, RenumberSeriesHandler, + ReprocessSeriesTitleHandler, ReprocessSeriesTitlesHandler, ScanLibraryHandler, TaskHandler, UserPluginRecommendationDismissHandler, UserPluginRecommendationsHandler, UserPluginSyncHandler, }; @@ -232,6 +232,18 @@ impl TaskWorker { } self.handlers .insert("plugin_auto_match".to_string(), Arc::new(handler)); + // Register the scheduled per-library metadata refresh handler. + // It depends on PluginManager (to call get_series_metadata) and + // optionally ThumbnailService (for cover-field updates via the + // shared MetadataApplier). + let mut refresh_handler = RefreshLibraryMetadataHandler::new(plugin_manager.clone()); + if let Some(ref thumbnail_service) = self.thumbnail_service { + refresh_handler = refresh_handler.with_thumbnail_service(thumbnail_service.clone()); + } + self.handlers.insert( + "refresh_library_metadata".to_string(), + Arc::new(refresh_handler), + ); // Register user plugin sync handler (with settings service for configurable timeout) let mut sync_handler = UserPluginSyncHandler::new(plugin_manager.clone()); if let Some(ref settings_service) = self.settings_service { From 822bac235626d21607fd85f0c16fde3236a64843 Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 11:42:35 -0700 Subject: [PATCH 03/12] feat(metadata): add field-group resolver and dry-run mode to series metadata apply Introduce the user-facing field-group taxonomy and the dry-run plumbing the scheduled per-library refresh needs to surface "what would change?" to admins before they commit to a schedule. - New `field_groups` module with a closed `FieldGroup` enum (Identifiers, Descriptive, Status, Counts, Ratings, Cover, Tags, Genres, AgeRating, Classification, Publisher, ExternalRefs) and a `fields_for_groups` resolver that expands group names to the camelCase fields the applier recognises. A self-test pins every returned field to a real `should_apply_field` call site so the mapping cannot drift silently. - `MetadataApplier::apply` gains `ApplyOptions::dry_run` and a `MetadataApplyResult::dry_run_report` carrying `FieldChange` entries. Every write site is gated: dry-run records the prospective change instead of touching the DB. Locks and permission denials still flow through `skipped_fields` via the same code path, so the report stays focused on writes that would actually happen. - HTTP series apply endpoint accepts `dryRun` in the request body and returns `dryRunReport` in the response (omitted on real applies). External-ID upsert is skipped on dry-run so a preview never mutates state. Book apply explicitly rejects `dryRun=true` with 400 because `BookMetadataApplier` does not honour the flag yet. - `refresh_planner::fields_filter_from_config` now delegates to the field-group resolver, so a config like `{ field_groups: ["ratings","status"] }` correctly produces `{"rating","externalRatings","status","year"}` for the applier. Tests cover the field-group mapping, dry-run write gating with row-state snapshots, the new apply API contract, and the planner's group expansion. Existing apply call sites are updated to set `dry_run: false` so the default behaviour is unchanged. --- docs/api/openapi.json | 67 +++ src/api/docs.rs | 2 + src/api/routes/v1/dto/plugins.rs | 38 ++ src/api/routes/v1/handlers/plugin_actions.rs | 67 ++- src/services/metadata/apply.rs | 508 +++++++++++++----- src/services/metadata/field_groups.rs | 364 +++++++++++++ src/services/metadata/mod.rs | 3 + src/services/metadata/refresh_planner.rs | 57 +- src/tasks/handlers/plugin_auto_match.rs | 1 + .../handlers/refresh_library_metadata.rs | 1 + tests/api/plugins.rs | 31 ++ tests/services.rs | 1 + tests/services/apply_dry_run.rs | 422 +++++++++++++++ tests/services/metadata_apply.rs | 1 + web/openapi.json | 67 +++ web/src/types/api.generated.ts | 34 ++ 16 files changed, 1496 insertions(+), 168 deletions(-) create mode 100644 src/services/metadata/field_groups.rs create mode 100644 tests/services/apply_dry_run.rs diff --git a/docs/api/openapi.json b/docs/api/openapi.json index b32c8275..38c2959b 100644 --- a/docs/api/openapi.json +++ b/docs/api/openapi.json @@ -21692,6 +21692,21 @@ } } }, + "DryRunReportDto": { + "type": "object", + "description": "Dry-run preview attached to [`MetadataApplyResponse`] when the request\nset `dryRun = true`. Absent on real applies.", + "required": [ + "changes" + ], + "properties": { + "changes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FieldChangeDto" + } + } + } + }, "DuplicateGroup": { "type": "object", "description": "A group of duplicate books", @@ -22834,6 +22849,23 @@ "not_provided" ] }, + "FieldChangeDto": { + "type": "object", + "description": "One would-be field change recorded during a dry-run apply.\n\nMirrors `services::metadata::apply::FieldChange`, kept as a distinct DTO\nto keep the wire-format frozen even if internal types evolve.", + "required": [ + "field", + "after" + ], + "properties": { + "after": {}, + "before": { + "description": "Current value, where cheaply available. `null` for fields backed by\njoined tables (genres, tags, alternate titles, ratings, etc.)." + }, + "field": { + "type": "string" + } + } + }, "FieldOperator": { "oneOf": [ { @@ -26091,6 +26123,10 @@ "externalId" ], "properties": { + "dryRun": { + "type": "boolean", + "description": "When `true`, the call simulates the apply without writing to the\ndatabase. Returns the same `appliedFields`/`skippedFields` plus an\nextra `dryRunReport` showing every would-be change. Default `false`." + }, "externalId": { "type": "string", "description": "External ID from the plugin's search results" @@ -26129,6 +26165,17 @@ }, "description": "Fields that were applied" }, + "dryRunReport": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/DryRunReportDto", + "description": "Populated only when the request set `dryRun = true`. Each entry is a\nfield that *would* have been written." + } + ] + }, "message": { "type": "string", "description": "Message" @@ -34363,6 +34410,26 @@ } } }, + { + "type": "object", + "description": "Scheduled per-library metadata refresh.\n\nWalks the library's series, applies the per-library\n`metadata_refresh_config` (field groups, providers, matching strategy),\nand refreshes metadata via the existing `MetadataApplier`.", + "required": [ + "libraryId", + "type" + ], + "properties": { + "libraryId": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "refresh_library_metadata" + ] + } + } + }, { "type": "object", "description": "Generate thumbnails for books in a scope (library, series, specific books, or all)\nThis is a fan-out task that enqueues individual GenerateThumbnail tasks", diff --git a/src/api/docs.rs b/src/api/docs.rs index f5c8fe48..f7af0595 100644 --- a/src/api/docs.rs +++ b/src/api/docs.rs @@ -878,6 +878,8 @@ The following paths are exempt from rate limiting: v1::dto::PreviewSummary, v1::dto::MetadataApplyRequest, v1::dto::MetadataApplyResponse, + v1::dto::FieldChangeDto, + v1::dto::DryRunReportDto, v1::dto::SkippedField, v1::dto::MetadataAutoMatchRequest, v1::dto::MetadataAutoMatchResponse, diff --git a/src/api/routes/v1/dto/plugins.rs b/src/api/routes/v1/dto/plugins.rs index 367b5f6c..fda61594 100644 --- a/src/api/routes/v1/dto/plugins.rs +++ b/src/api/routes/v1/dto/plugins.rs @@ -1290,6 +1290,39 @@ pub struct MetadataApplyRequest { /// Optional list of fields to apply (default: all applicable fields) #[serde(skip_serializing_if = "Option::is_none")] pub fields: Option>, + + /// When `true`, the call simulates the apply without writing to the + /// database. Returns the same `appliedFields`/`skippedFields` plus an + /// extra `dryRunReport` showing every would-be change. Default `false`. + #[serde(default, skip_serializing_if = "is_false")] + pub dry_run: bool, +} + +/// One would-be field change recorded during a dry-run apply. +/// +/// Mirrors `services::metadata::apply::FieldChange`, kept as a distinct DTO +/// to keep the wire-format frozen even if internal types evolve. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct FieldChangeDto { + pub field: String, + /// Current value, where cheaply available. `null` for fields backed by + /// joined tables (genres, tags, alternate titles, ratings, etc.). + #[serde(skip_serializing_if = "Option::is_none")] + pub before: Option, + pub after: serde_json::Value, +} + +/// Dry-run preview attached to [`MetadataApplyResponse`] when the request +/// set `dryRun = true`. Absent on real applies. +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DryRunReportDto { + pub changes: Vec, +} + +fn is_false(b: &bool) -> bool { + !*b } /// Response after applying metadata @@ -1307,6 +1340,11 @@ pub struct MetadataApplyResponse { /// Message pub message: String, + + /// Populated only when the request set `dryRun = true`. Each entry is a + /// field that *would* have been written. + #[serde(skip_serializing_if = "Option::is_none")] + pub dry_run_report: Option, } /// A field that was skipped during apply diff --git a/src/api/routes/v1/handlers/plugin_actions.rs b/src/api/routes/v1/handlers/plugin_actions.rs index ffb7f324..d59a63b6 100644 --- a/src/api/routes/v1/handlers/plugin_actions.rs +++ b/src/api/routes/v1/handlers/plugin_actions.rs @@ -11,13 +11,13 @@ //! - POST /api/v1/books/{id}/metadata/apply - Apply metadata for a book use super::super::dto::{ - EnqueueAutoMatchRequest, EnqueueAutoMatchResponse, EnqueueBulkAutoMatchRequest, - EnqueueLibraryAutoMatchRequest, ExecutePluginRequest, ExecutePluginResponse, FieldApplyStatus, - MetadataAction, MetadataApplyRequest, MetadataApplyResponse, MetadataAutoMatchRequest, - MetadataAutoMatchResponse, MetadataFieldPreview, MetadataPreviewRequest, - MetadataPreviewResponse, PluginActionDto, PluginActionRequest, PluginActionsResponse, - PluginSearchResponse, PluginSearchResultDto, PreviewSummary, SearchTitleResponse, SkippedField, - parse_scope, + DryRunReportDto, EnqueueAutoMatchRequest, EnqueueAutoMatchResponse, + EnqueueBulkAutoMatchRequest, EnqueueLibraryAutoMatchRequest, ExecutePluginRequest, + ExecutePluginResponse, FieldApplyStatus, FieldChangeDto, MetadataAction, MetadataApplyRequest, + MetadataApplyResponse, MetadataAutoMatchRequest, MetadataAutoMatchResponse, + MetadataFieldPreview, MetadataPreviewRequest, MetadataPreviewResponse, PluginActionDto, + PluginActionRequest, PluginActionsResponse, PluginSearchResponse, PluginSearchResultDto, + PreviewSummary, SearchTitleResponse, SkippedField, parse_scope, }; use crate::api::{AppState, error::ApiError, extractors::AuthContext, permissions::Permission}; use crate::db::entities::plugins::PluginPermission; @@ -1432,10 +1432,12 @@ pub async fn apply_series_metadata( .map_err(|e| ApiError::Internal(format!("Failed to get current metadata: {}", e)))?; // Build apply options + let dry_run = request.dry_run; let options = ApplyOptions { fields_filter: request.fields.map(|f| f.into_iter().collect()), thumbnail_service: Some(state.thumbnail_service.clone()), event_broadcaster: Some(state.event_broadcaster.clone()), + dry_run, }; // Apply metadata using the shared service @@ -1451,16 +1453,18 @@ pub async fn apply_series_metadata( .await .map_err(|e| ApiError::Internal(format!("Failed to apply metadata: {}", e)))?; - // Store/update external ID for future lookups (matches auto-match behavior) - if let Err(e) = SeriesExternalIdRepository::upsert_for_plugin( - &state.db, - series_id, - &plugin.name, - &plugin_metadata.external_id, - Some(&plugin_metadata.external_url), - None, // metadata_hash - could be computed if needed - ) - .await + // Store/update external ID for future lookups (matches auto-match + // behavior). Skipped on dry-run so a preview never mutates DB state. + if !dry_run + && let Err(e) = SeriesExternalIdRepository::upsert_for_plugin( + &state.db, + series_id, + &plugin.name, + &plugin_metadata.external_id, + Some(&plugin_metadata.external_url), + None, // metadata_hash - could be computed if needed + ) + .await { tracing::warn!( "Failed to store external ID for series {}: {}", @@ -1480,8 +1484,25 @@ pub async fn apply_series_metadata( }) .collect(); + let dry_run_report = result.dry_run_report.map(|r| DryRunReportDto { + changes: r + .changes + .into_iter() + .map(|c| FieldChangeDto { + field: c.field, + before: c.before, + after: c.after, + }) + .collect(), + }); + let message = if result.applied_fields.is_empty() { "No fields were applied".to_string() + } else if dry_run { + format!( + "Dry run: {} field(s) would be applied", + result.applied_fields.len() + ) } else { format!("Applied {} field(s)", result.applied_fields.len()) }; @@ -1491,6 +1512,7 @@ pub async fn apply_series_metadata( applied_fields: result.applied_fields, skipped_fields, message, + dry_run_report, })) } @@ -1637,6 +1659,7 @@ pub async fn auto_match_series_metadata( fields_filter: None, // Apply all fields thumbnail_service: Some(state.thumbnail_service.clone()), event_broadcaster: Some(state.event_broadcaster.clone()), + dry_run: false, }; // Apply metadata using the shared service @@ -2279,6 +2302,14 @@ pub async fn apply_book_metadata( // Check permission to edit book metadata auth.require_permission(&Permission::BooksWrite)?; + // Dry-run is currently only supported on the series apply path. Reject + // it explicitly here so the user doesn't get a silent real-apply. + if request.dry_run { + return Err(ApiError::BadRequest( + "dryRun is not supported on book metadata apply".to_string(), + )); + } + // Get the book (verify it exists) let book = BookRepository::get_by_id(&state.db, book_id) .await @@ -2374,6 +2405,8 @@ pub async fn apply_book_metadata( applied_fields: result.applied_fields, skipped_fields, message, + // Book apply doesn't support dry-run yet; series-only for now. + dry_run_report: None, })) } diff --git a/src/services/metadata/apply.rs b/src/services/metadata/apply.rs index b9b9be7a..28f5c7bb 100644 --- a/src/services/metadata/apply.rs +++ b/src/services/metadata/apply.rs @@ -33,13 +33,43 @@ pub struct SkippedField { pub reason: String, } +/// One field's would-be change as recorded by a dry-run apply. +/// +/// `before` is the in-memory current value pulled from `series_metadata` +/// (or `None` when the source-of-truth lives in a joined table — e.g. +/// genres, tags, alternate titles, ratings, external links/ids, cover). +/// `after` is the value the applier would have written. Both are JSON +/// for uniform transport over the wire. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct FieldChange { + pub field: String, + pub before: Option, + pub after: serde_json::Value, +} + +/// Per-series report of what a dry-run apply would have done. Returned +/// alongside [`MetadataApplyResult`] when `ApplyOptions::dry_run = true`. +/// +/// Skip reasons (locked field, missing permission, etc.) are surfaced via +/// [`MetadataApplyResult::skipped_fields`] — same code path as a real +/// apply, so the report stays focused on prospective writes. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DryRunReport { + pub changes: Vec, +} + /// Result of applying metadata to a series. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct MetadataApplyResult { /// Fields that were successfully applied. pub applied_fields: Vec, /// Fields that were skipped (with reasons). pub skipped_fields: Vec, + /// Populated only when `ApplyOptions::dry_run = true`. `None` for real + /// applies. Each entry is a field that *would* have been written. + pub dry_run_report: Option, } /// Options for controlling metadata application behavior. @@ -51,6 +81,10 @@ pub struct ApplyOptions { pub thumbnail_service: Option>, /// Event broadcaster for emitting real-time events. If None, events won't be emitted. pub event_broadcaster: Option>, + /// When true, all DB writes are skipped and a [`DryRunReport`] is + /// returned instead. Locks and permission checks still run — same code + /// path, just gated writes. + pub dry_run: bool, } /// Service for applying plugin metadata to series. @@ -74,6 +108,13 @@ impl MetadataApplier { ) -> Result { let mut applied_fields = Vec::new(); let mut skipped_fields = Vec::new(); + // Collected when `options.dry_run = true`, kept `None` otherwise so a + // real apply still returns `dry_run_report = None`. + let mut dry_run_changes: Option> = if options.dry_run { + Some(Vec::new()) + } else { + None + }; // Helper to check if a field should be applied based on the filter let should_apply_field = |field: &str| -> bool { @@ -118,14 +159,33 @@ impl MetadataApplier { } else { Some(title.clone()) }; - SeriesMetadataRepository::update_title( - db, - series_id, - title.clone(), - title_sort, - ) - .await - .context("Failed to update title")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "title".to_string(), + before: Some(serde_json::json!( + current_metadata.map(|m| m.title.clone()) + )), + after: serde_json::json!(title), + }); + if !title_sort_locked && should_apply_field("titleSort") { + changes.push(FieldChange { + field: "titleSort".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.title_sort.clone()) + )), + after: serde_json::json!(title_sort), + }); + } + } else { + SeriesMetadataRepository::update_title( + db, + series_id, + title.clone(), + title_sort, + ) + .await + .context("Failed to update title")?; + } applied_fields.push("title".to_string()); if !title_sort_locked && should_apply_field("titleSort") { applied_fields.push("titleSort".to_string()); @@ -146,35 +206,50 @@ impl MetadataApplier { PluginPermission::MetadataWriteTitle, ) { Ok(_) => { - // Delete existing alternate titles - AlternateTitleRepository::delete_all_for_series(db, series_id) - .await - .context("Failed to delete old alternate titles")?; - - // Add new alternate titles with unique labels - // Track label counts to make duplicates unique (e.g., "en", "en-2", "en-3") - let mut label_counts: HashMap = HashMap::new(); - - for alt_title in &metadata.alternate_titles { - // Use language or title_type as base label, defaulting to "alternate" - let base_label = alt_title - .language - .clone() - .or_else(|| alt_title.title_type.clone()) - .unwrap_or_else(|| "alternate".to_string()); - - // Make label unique by appending count suffix for duplicates - let count = label_counts.entry(base_label.clone()).or_insert(0); - *count += 1; - let label = if *count == 1 { - base_label - } else { - format!("{}-{}", base_label, count) - }; - - AlternateTitleRepository::create(db, series_id, &label, &alt_title.title) + if let Some(changes) = dry_run_changes.as_mut() { + // Joined-table data: `before` not cheaply available + // here. Surface the new payload as `after` only. + changes.push(FieldChange { + field: "alternateTitles".to_string(), + before: None, + after: serde_json::json!(metadata.alternate_titles), + }); + } else { + // Delete existing alternate titles + AlternateTitleRepository::delete_all_for_series(db, series_id) + .await + .context("Failed to delete old alternate titles")?; + + // Add new alternate titles with unique labels + // Track label counts to make duplicates unique (e.g., "en", "en-2", "en-3") + let mut label_counts: HashMap = HashMap::new(); + + for alt_title in &metadata.alternate_titles { + // Use language or title_type as base label, defaulting to "alternate" + let base_label = alt_title + .language + .clone() + .or_else(|| alt_title.title_type.clone()) + .unwrap_or_else(|| "alternate".to_string()); + + // Make label unique by appending count suffix for duplicates + let count = label_counts.entry(base_label.clone()).or_insert(0); + *count += 1; + let label = if *count == 1 { + base_label + } else { + format!("{}-{}", base_label, count) + }; + + AlternateTitleRepository::create( + db, + series_id, + &label, + &alt_title.title, + ) .await .context("Failed to create alternate title")?; + } } applied_fields.push("alternateTitles".to_string()); } @@ -189,9 +264,23 @@ impl MetadataApplier { let is_locked = current_metadata.map(|m| m.summary_lock).unwrap_or(false); match check_field("summary", is_locked, PluginPermission::MetadataWriteSummary) { Ok(_) => { - SeriesMetadataRepository::update_summary(db, series_id, Some(summary.clone())) + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "summary".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.summary.clone()) + )), + after: serde_json::json!(summary), + }); + } else { + SeriesMetadataRepository::update_summary( + db, + series_id, + Some(summary.clone()), + ) .await .context("Failed to update summary")?; + } applied_fields.push("summary".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -205,9 +294,17 @@ impl MetadataApplier { let is_locked = current_metadata.map(|m| m.year_lock).unwrap_or(false); match check_field("year", is_locked, PluginPermission::MetadataWriteYear) { Ok(_) => { - SeriesMetadataRepository::update_year(db, series_id, Some(year)) - .await - .context("Failed to update year")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "year".to_string(), + before: Some(serde_json::json!(current_metadata.and_then(|m| m.year))), + after: serde_json::json!(year), + }); + } else { + SeriesMetadataRepository::update_year(db, series_id, Some(year)) + .await + .context("Failed to update year")?; + } applied_fields.push("year".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -228,13 +325,23 @@ impl MetadataApplier { SeriesStatus::Abandoned => "abandoned", SeriesStatus::Unknown => "unknown", }; - SeriesMetadataRepository::update_status( - db, - series_id, - Some(status_str.to_string()), - ) - .await - .context("Failed to update status")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "status".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.status.clone()) + )), + after: serde_json::json!(status_str), + }); + } else { + SeriesMetadataRepository::update_status( + db, + series_id, + Some(status_str.to_string()), + ) + .await + .context("Failed to update status")?; + } applied_fields.push("status".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -252,14 +359,24 @@ impl MetadataApplier { PluginPermission::MetadataWritePublisher, ) { Ok(_) => { - SeriesMetadataRepository::update_publisher( - db, - series_id, - Some(publisher.clone()), - None, - ) - .await - .context("Failed to update publisher")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "publisher".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.publisher.clone()) + )), + after: serde_json::json!(publisher), + }); + } else { + SeriesMetadataRepository::update_publisher( + db, + series_id, + Some(publisher.clone()), + None, + ) + .await + .context("Failed to update publisher")?; + } applied_fields.push("publisher".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -277,9 +394,23 @@ impl MetadataApplier { PluginPermission::MetadataWriteAgeRating, ) { Ok(_) => { - SeriesMetadataRepository::update_age_rating(db, series_id, Some(age_rating)) + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "ageRating".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.age_rating) + )), + after: serde_json::json!(age_rating), + }); + } else { + SeriesMetadataRepository::update_age_rating( + db, + series_id, + Some(age_rating), + ) .await .context("Failed to update age rating")?; + } applied_fields.push("ageRating".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -297,13 +428,23 @@ impl MetadataApplier { PluginPermission::MetadataWriteLanguage, ) { Ok(_) => { - SeriesMetadataRepository::update_language( - db, - series_id, - Some(language.clone()), - ) - .await - .context("Failed to update language")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "language".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.language.clone()) + )), + after: serde_json::json!(language), + }); + } else { + SeriesMetadataRepository::update_language( + db, + series_id, + Some(language.clone()), + ) + .await + .context("Failed to update language")?; + } applied_fields.push("language".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -323,13 +464,23 @@ impl MetadataApplier { PluginPermission::MetadataWriteReadingDirection, ) { Ok(_) => { - SeriesMetadataRepository::update_reading_direction( - db, - series_id, - Some(reading_direction.clone()), - ) - .await - .context("Failed to update reading direction")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "readingDirection".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.reading_direction.clone()) + )), + after: serde_json::json!(reading_direction), + }); + } else { + SeriesMetadataRepository::update_reading_direction( + db, + series_id, + Some(reading_direction.clone()), + ) + .await + .context("Failed to update reading direction")?; + } applied_fields.push("readingDirection".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -349,13 +500,23 @@ impl MetadataApplier { PluginPermission::MetadataWriteTotalVolumeCount, ) { Ok(_) => { - SeriesMetadataRepository::update_total_volume_count( - db, - series_id, - Some(total_volume_count), - ) - .await - .context("Failed to update total volume count")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "totalVolumeCount".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.total_volume_count) + )), + after: serde_json::json!(total_volume_count), + }); + } else { + SeriesMetadataRepository::update_total_volume_count( + db, + series_id, + Some(total_volume_count), + ) + .await + .context("Failed to update total volume count")?; + } applied_fields.push("totalVolumeCount".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -375,13 +536,23 @@ impl MetadataApplier { PluginPermission::MetadataWriteTotalChapterCount, ) { Ok(_) => { - SeriesMetadataRepository::update_total_chapter_count( - db, - series_id, - Some(total_chapter_count), - ) - .await - .context("Failed to update total chapter count")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "totalChapterCount".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.total_chapter_count) + )), + after: serde_json::json!(total_chapter_count), + }); + } else { + SeriesMetadataRepository::update_total_chapter_count( + db, + series_id, + Some(total_chapter_count), + ) + .await + .context("Failed to update total chapter count")?; + } applied_fields.push("totalChapterCount".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -402,9 +573,17 @@ impl MetadataApplier { reason: "Plugin does not have permission".to_string(), }); } else { - GenreRepository::set_genres_for_series(db, series_id, metadata.genres.clone()) - .await - .context("Failed to set genres")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "genres".to_string(), + before: None, + after: serde_json::json!(metadata.genres), + }); + } else { + GenreRepository::set_genres_for_series(db, series_id, metadata.genres.clone()) + .await + .context("Failed to set genres")?; + } applied_fields.push("genres".to_string()); } } @@ -423,9 +602,17 @@ impl MetadataApplier { reason: "Plugin does not have permission".to_string(), }); } else { - TagRepository::set_tags_for_series(db, series_id, metadata.tags.clone()) - .await - .context("Failed to set tags")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "tags".to_string(), + before: None, + after: serde_json::json!(metadata.tags), + }); + } else { + TagRepository::set_tags_for_series(db, series_id, metadata.tags.clone()) + .await + .context("Failed to set tags")?; + } applied_fields.push("tags".to_string()); } } @@ -439,13 +626,23 @@ impl MetadataApplier { Ok(_) => { let authors_json = serde_json::to_string(&metadata.authors) .unwrap_or_else(|_| "[]".to_string()); - SeriesMetadataRepository::update_authors_json( - db, - series_id, - Some(authors_json), - ) - .await - .context("Failed to update authors")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "authors".to_string(), + before: Some(serde_json::json!( + current_metadata.and_then(|m| m.authors_json.clone()) + )), + after: serde_json::json!(metadata.authors), + }); + } else { + SeriesMetadataRepository::update_authors_json( + db, + series_id, + Some(authors_json), + ) + .await + .context("Failed to update authors")?; + } applied_fields.push("authors".to_string()); } Err(skip) => skipped_fields.push(skip), @@ -460,10 +657,18 @@ impl MetadataApplier { reason: "Plugin does not have permission".to_string(), }); } else { - for link in &metadata.external_links { - ExternalLinkRepository::upsert(db, series_id, &link.label, &link.url, None) - .await - .context("Failed to upsert external link")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "externalLinks".to_string(), + before: None, + after: serde_json::json!(metadata.external_links), + }); + } else { + for link in &metadata.external_links { + ExternalLinkRepository::upsert(db, series_id, &link.label, &link.url, None) + .await + .context("Failed to upsert external link")?; + } } applied_fields.push("externalLinks".to_string()); } @@ -477,17 +682,25 @@ impl MetadataApplier { reason: "Plugin does not have permission".to_string(), }); } else { - for ext_id in &metadata.external_ids { - SeriesExternalIdRepository::upsert( - db, - series_id, - &ext_id.source, - &ext_id.external_id, - None, // external_url - not provided in cross-references - None, // metadata_hash - not applicable for cross-references - ) - .await - .context("Failed to upsert external ID")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "externalIds".to_string(), + before: None, + after: serde_json::json!(metadata.external_ids), + }); + } else { + for ext_id in &metadata.external_ids { + SeriesExternalIdRepository::upsert( + db, + series_id, + &ext_id.source, + &ext_id.external_id, + None, // external_url - not provided in cross-references + None, // metadata_hash - not applicable for cross-references + ) + .await + .context("Failed to upsert external ID")?; + } } applied_fields.push("externalIds".to_string()); } @@ -503,17 +716,25 @@ impl MetadataApplier { reason: "Plugin does not have permission".to_string(), }); } else { - let score = - Decimal::from_f64_retain(rating.score).unwrap_or_else(|| Decimal::new(0, 0)); - ExternalRatingRepository::upsert( - db, - series_id, - &rating.source, - score, - rating.vote_count, - ) - .await - .context("Failed to upsert external rating")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "rating".to_string(), + before: None, + after: serde_json::json!(rating), + }); + } else { + let score = Decimal::from_f64_retain(rating.score) + .unwrap_or_else(|| Decimal::new(0, 0)); + ExternalRatingRepository::upsert( + db, + series_id, + &rating.source, + score, + rating.vote_count, + ) + .await + .context("Failed to upsert external rating")?; + } applied_fields.push("rating".to_string()); } } @@ -528,18 +749,26 @@ impl MetadataApplier { }); } } else { - for rating in &metadata.external_ratings { - let score = Decimal::from_f64_retain(rating.score) - .unwrap_or_else(|| Decimal::new(0, 0)); - ExternalRatingRepository::upsert( - db, - series_id, - &rating.source, - score, - rating.vote_count, - ) - .await - .context("Failed to upsert external rating")?; + if let Some(changes) = dry_run_changes.as_mut() { + changes.push(FieldChange { + field: "externalRatings".to_string(), + before: None, + after: serde_json::json!(metadata.external_ratings), + }); + } else { + for rating in &metadata.external_ratings { + let score = Decimal::from_f64_retain(rating.score) + .unwrap_or_else(|| Decimal::new(0, 0)); + ExternalRatingRepository::upsert( + db, + series_id, + &rating.source, + score, + rating.vote_count, + ) + .await + .context("Failed to upsert external rating")?; + } } if !applied_fields.contains(&"rating".to_string()) { applied_fields.push("externalRatings".to_string()); @@ -557,6 +786,14 @@ impl MetadataApplier { field: "coverUrl".to_string(), reason: "Plugin does not have permission".to_string(), }); + } else if let Some(changes) = dry_run_changes.as_mut() { + // Dry run: don't download, just record the new URL. + changes.push(FieldChange { + field: "coverUrl".to_string(), + before: None, + after: serde_json::json!(cover_url), + }); + applied_fields.push("coverUrl".to_string()); } else if let Some(thumbnail_service) = &options.thumbnail_service { match CoverService::download_and_apply( db, @@ -592,6 +829,7 @@ impl MetadataApplier { Ok(MetadataApplyResult { applied_fields, skipped_fields, + dry_run_report: dry_run_changes.map(|changes| DryRunReport { changes }), }) } } diff --git a/src/services/metadata/field_groups.rs b/src/services/metadata/field_groups.rs new file mode 100644 index 00000000..dc739d9b --- /dev/null +++ b/src/services/metadata/field_groups.rs @@ -0,0 +1,364 @@ +//! User-facing field-group taxonomy for the scheduled metadata refresh. +//! +//! The scheduled refresh exposes a small set of named groups (Ratings, +//! Status, Counts, etc.) instead of asking the user to pick from the ~20 +//! camelCase field names that [`crate::services::metadata::MetadataApplier`] +//! understands. This module is the authoritative mapping between the two +//! vocabularies. +//! +//! ## Vocabulary +//! +//! - **Field name**: a camelCase string the applier recognises in +//! `ApplyOptions::fields_filter` (e.g. `"rating"`, `"totalVolumeCount"`). +//! Source of truth: `should_apply_field(...)` call sites in +//! `services/metadata/apply.rs`. +//! - **Group name**: a snake_case string stored in +//! `MetadataRefreshConfig::field_groups` (e.g. `"ratings"`, `"counts"`). +//! This is what the UI persists. +//! +//! ## Responsibilities +//! +//! - [`FieldGroup`] enum — closed set of groups, parseable from snake_case. +//! - [`fields_for_group`] — single group → its field set. +//! - [`fields_for_groups`] — slice of groups → deduplicated field set +//! (returns an `Option>` mirroring `ApplyOptions`). +//! - [`group_for_field`] — reverse lookup, used by the UI for display. + +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::str::FromStr; + +/// Closed taxonomy of refresh field groups. +/// +/// Stored values are snake_case strings — see [`FieldGroup::as_str`] / +/// [`FieldGroup::from_str`]. The mapping to concrete field names is +/// intentionally narrow and conservative: each group covers fields that +/// "move together" semantically, so refreshing a group rarely surprises +/// the user. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FieldGroup { + /// Title, title sort, alternate titles. + Identifiers, + /// Summary and structured author info. + Descriptive, + /// Publication status and publication year. + Status, + /// Total volume count and total chapter count. + Counts, + /// Primary rating and full external-ratings table. + Ratings, + /// Series cover image URL. + Cover, + /// Free-form tags. + Tags, + /// Genres. + Genres, + /// Age rating. + AgeRating, + /// Language and reading direction. + Classification, + /// Publisher and imprint. + Publisher, + /// Cross-references to other services and editorial links. + ExternalRefs, +} + +impl FieldGroup { + /// Snake_case identifier used in storage and over the wire. + pub fn as_str(&self) -> &'static str { + match self { + FieldGroup::Identifiers => "identifiers", + FieldGroup::Descriptive => "descriptive", + FieldGroup::Status => "status", + FieldGroup::Counts => "counts", + FieldGroup::Ratings => "ratings", + FieldGroup::Cover => "cover", + FieldGroup::Tags => "tags", + FieldGroup::Genres => "genres", + FieldGroup::AgeRating => "age_rating", + FieldGroup::Classification => "classification", + FieldGroup::Publisher => "publisher", + FieldGroup::ExternalRefs => "external_refs", + } + } + + /// All groups, in display order. Used by the public field-group endpoint. + pub fn all() -> &'static [FieldGroup] { + &[ + FieldGroup::Identifiers, + FieldGroup::Descriptive, + FieldGroup::Status, + FieldGroup::Counts, + FieldGroup::Ratings, + FieldGroup::Cover, + FieldGroup::Tags, + FieldGroup::Genres, + FieldGroup::AgeRating, + FieldGroup::Classification, + FieldGroup::Publisher, + FieldGroup::ExternalRefs, + ] + } +} + +impl FromStr for FieldGroup { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "identifiers" => Ok(FieldGroup::Identifiers), + "descriptive" => Ok(FieldGroup::Descriptive), + "status" => Ok(FieldGroup::Status), + "counts" => Ok(FieldGroup::Counts), + "ratings" => Ok(FieldGroup::Ratings), + "cover" => Ok(FieldGroup::Cover), + "tags" => Ok(FieldGroup::Tags), + "genres" => Ok(FieldGroup::Genres), + "age_rating" => Ok(FieldGroup::AgeRating), + "classification" => Ok(FieldGroup::Classification), + "publisher" => Ok(FieldGroup::Publisher), + "external_refs" => Ok(FieldGroup::ExternalRefs), + other => Err(format!("Unknown field group '{}'", other)), + } + } +} + +/// Concrete field names (camelCase) covered by a single group. +/// +/// Field names match the strings the [`MetadataApplier`] checks via +/// `should_apply_field`. Adding a field here without a matching applier +/// branch silently does nothing — there's a unit test that asserts every +/// returned field is one the applier actually knows about. +/// +/// [`MetadataApplier`]: crate::services::metadata::MetadataApplier +pub fn fields_for_group(group: FieldGroup) -> &'static [&'static str] { + match group { + FieldGroup::Identifiers => &["title", "titleSort", "alternateTitles"], + FieldGroup::Descriptive => &["summary", "authors"], + FieldGroup::Status => &["status", "year"], + FieldGroup::Counts => &["totalVolumeCount", "totalChapterCount"], + FieldGroup::Ratings => &["rating", "externalRatings"], + FieldGroup::Cover => &["coverUrl"], + FieldGroup::Tags => &["tags"], + FieldGroup::Genres => &["genres"], + FieldGroup::AgeRating => &["ageRating"], + FieldGroup::Classification => &["language", "readingDirection"], + FieldGroup::Publisher => &["publisher"], + FieldGroup::ExternalRefs => &["externalIds", "externalLinks"], + } +} + +/// Expand a slice of group names into a deduplicated set of field names. +/// +/// Unknown group strings are silently ignored — callers that want strict +/// validation should call [`FieldGroup::from_str`] up front (the PATCH +/// endpoint will, in Phase 6). +/// +/// Returns `None` when both `groups` and `extras` are empty, matching the +/// "no filter, apply everything" semantics of +/// [`crate::services::metadata::ApplyOptions::fields_filter`]. +pub fn fields_for_groups>(groups: &[S], extras: &[S]) -> Option> { + if groups.is_empty() && extras.is_empty() { + return None; + } + let mut out: HashSet = HashSet::new(); + for g in groups { + if let Ok(group) = FieldGroup::from_str(g.as_ref()) { + for field in fields_for_group(group) { + out.insert((*field).to_string()); + } + } + } + for f in extras { + out.insert(f.as_ref().to_string()); + } + Some(out) +} + +/// Reverse lookup: given a field name, return the group it belongs to. +/// +/// Used by the UI to render "this field belongs to group X" in the +/// dry-run preview. Returns `None` for fields that aren't part of any +/// group (e.g. `customMetadata`, which the scheduled refresh doesn't +/// touch). +pub fn group_for_field(field: &str) -> Option { + for group in FieldGroup::all() { + if fields_for_group(*group).contains(&field) { + return Some(*group); + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Every field name returned by the resolver must be one the applier + /// actually checks. Otherwise we're silently no-op'ing the user's + /// selection. + /// + /// Source of truth: `should_apply_field("...")` call sites in + /// `services/metadata/apply.rs`. + const APPLIER_KNOWN_FIELDS: &[&str] = &[ + "title", + "titleSort", + "alternateTitles", + "summary", + "year", + "status", + "publisher", + "ageRating", + "language", + "readingDirection", + "totalVolumeCount", + "totalChapterCount", + "genres", + "tags", + "authors", + "externalLinks", + "externalIds", + "rating", + "externalRatings", + "coverUrl", + ]; + + #[test] + fn every_field_in_every_group_is_known_to_the_applier() { + for group in FieldGroup::all() { + for field in fields_for_group(*group) { + assert!( + APPLIER_KNOWN_FIELDS.contains(field), + "field '{}' (group {:?}) is not recognized by MetadataApplier", + field, + group + ); + } + } + } + + #[test] + fn from_str_round_trips() { + for group in FieldGroup::all() { + let s = group.as_str(); + let parsed = FieldGroup::from_str(s).expect("should parse"); + assert_eq!(parsed, *group); + } + } + + #[test] + fn from_str_rejects_unknown() { + assert!(FieldGroup::from_str("not_a_group").is_err()); + assert!(FieldGroup::from_str("").is_err()); + } + + #[test] + fn ratings_group_maps_to_rating_fields() { + let fields = fields_for_group(FieldGroup::Ratings); + assert_eq!(fields, &["rating", "externalRatings"]); + } + + #[test] + fn counts_group_includes_both_counts() { + let fields = fields_for_group(FieldGroup::Counts); + assert!(fields.contains(&"totalVolumeCount")); + assert!(fields.contains(&"totalChapterCount")); + } + + #[test] + fn fields_for_groups_returns_none_when_empty() { + let groups: Vec<&str> = vec![]; + let extras: Vec<&str> = vec![]; + assert!(fields_for_groups(&groups, &extras).is_none()); + } + + #[test] + fn fields_for_groups_expands_groups() { + let groups = ["ratings", "status"]; + let extras: Vec<&str> = vec![]; + let out = fields_for_groups(&groups, &extras).unwrap(); + assert!(out.contains("rating")); + assert!(out.contains("externalRatings")); + assert!(out.contains("status")); + assert!(out.contains("year")); + assert_eq!(out.len(), 4); + } + + #[test] + fn fields_for_groups_dedupes_overlapping_groups() { + // Identifiers and Descriptive don't overlap, but extras can. + let groups = ["identifiers"]; + let extras = ["title", "summary"]; + let out = fields_for_groups(&groups, &extras).unwrap(); + // identifiers brings title + titleSort + alternateTitles; extras add summary. + // 'title' is duplicated by extras; should appear once. + assert!(out.contains("title")); + assert!(out.contains("titleSort")); + assert!(out.contains("alternateTitles")); + assert!(out.contains("summary")); + assert_eq!(out.len(), 4); + } + + #[test] + fn fields_for_groups_silently_ignores_unknown_groups() { + let groups = ["ratings", "made_up_group"]; + let extras: Vec<&str> = vec![]; + let out = fields_for_groups(&groups, &extras).unwrap(); + assert_eq!(out.len(), 2); + assert!(out.contains("rating")); + assert!(out.contains("externalRatings")); + } + + #[test] + fn fields_for_groups_returns_only_extras_when_groups_empty() { + let groups: Vec<&str> = vec![]; + let extras = ["language", "publisher"]; + let out = fields_for_groups(&groups, &extras).unwrap(); + assert_eq!(out.len(), 2); + assert!(out.contains("language")); + assert!(out.contains("publisher")); + } + + #[test] + fn group_for_field_finds_group() { + assert_eq!(group_for_field("rating"), Some(FieldGroup::Ratings)); + assert_eq!( + group_for_field("externalRatings"), + Some(FieldGroup::Ratings) + ); + assert_eq!( + group_for_field("totalVolumeCount"), + Some(FieldGroup::Counts) + ); + assert_eq!(group_for_field("status"), Some(FieldGroup::Status)); + assert_eq!(group_for_field("year"), Some(FieldGroup::Status)); + assert_eq!(group_for_field("coverUrl"), Some(FieldGroup::Cover)); + } + + #[test] + fn group_for_field_returns_none_for_unknown_field() { + assert_eq!(group_for_field("notARealField"), None); + assert_eq!(group_for_field(""), None); + } + + #[test] + fn all_groups_are_listed_in_all() { + // FieldGroup::all() should match the variants in the enum. If you + // add a variant, add it to all() too. + let listed: HashSet = FieldGroup::all().iter().copied().collect(); + assert!(listed.contains(&FieldGroup::Identifiers)); + assert!(listed.contains(&FieldGroup::Descriptive)); + assert!(listed.contains(&FieldGroup::Status)); + assert!(listed.contains(&FieldGroup::Counts)); + assert!(listed.contains(&FieldGroup::Ratings)); + assert!(listed.contains(&FieldGroup::Cover)); + assert!(listed.contains(&FieldGroup::Tags)); + assert!(listed.contains(&FieldGroup::Genres)); + assert!(listed.contains(&FieldGroup::AgeRating)); + assert!(listed.contains(&FieldGroup::Classification)); + assert!(listed.contains(&FieldGroup::Publisher)); + assert!(listed.contains(&FieldGroup::ExternalRefs)); + assert_eq!(listed.len(), 12); + } +} diff --git a/src/services/metadata/mod.rs b/src/services/metadata/mod.rs index 28c8f0cc..979964d0 100644 --- a/src/services/metadata/mod.rs +++ b/src/services/metadata/mod.rs @@ -10,6 +10,7 @@ mod apply; mod book_apply; mod cover; +pub mod field_groups; pub mod preprocessing; pub mod refresh_config; pub mod refresh_planner; @@ -17,6 +18,8 @@ pub mod refresh_planner; pub use apply::{ApplyOptions, MetadataApplier, SkippedField}; pub use book_apply::{BookApplyOptions, BookMetadataApplier}; pub use cover::CoverService; +#[allow(unused_imports)] +pub use field_groups::{FieldGroup, fields_for_group, fields_for_groups, group_for_field}; pub use refresh_config::{MetadataRefreshConfig, parse_metadata_refresh_config}; // Re-exported for downstream phases (HTTP PATCH endpoint, per-provider // overrides). Allowed to be unused until those phases land. diff --git a/src/services/metadata/refresh_planner.rs b/src/services/metadata/refresh_planner.rs index da405c1f..0be16f71 100644 --- a/src/services/metadata/refresh_planner.rs +++ b/src/services/metadata/refresh_planner.rs @@ -237,22 +237,16 @@ async fn resolve_provider(db: &DatabaseConnection, provider: &str) -> Result Option> { - if config.field_groups.is_empty() && config.extra_fields.is_empty() { - return None; - } - let mut out: HashSet = HashSet::new(); - out.extend(config.field_groups.iter().cloned()); - out.extend(config.extra_fields.iter().cloned()); - Some(out) + super::field_groups::fields_for_groups(&config.field_groups, &config.extra_fields) } #[cfg(test)] @@ -580,16 +574,47 @@ mod tests { } #[test] - fn fields_filter_combines_groups_and_extras() { + fn fields_filter_expands_groups_to_concrete_fields() { + // "ratings" → ["rating", "externalRatings"] + // "status" → ["status", "year"] + // extras → ["language"] let cfg = MetadataRefreshConfig { field_groups: vec!["ratings".to_string(), "status".to_string()], extra_fields: vec!["language".to_string()], ..Default::default() }; let filter = fields_filter_from_config(&cfg).unwrap(); - assert!(filter.contains("ratings")); + assert!(filter.contains("rating")); + assert!(filter.contains("externalRatings")); assert!(filter.contains("status")); + assert!(filter.contains("year")); assert!(filter.contains("language")); - assert_eq!(filter.len(), 3); + assert_eq!(filter.len(), 5); + } + + #[test] + fn fields_filter_extras_only_passes_through() { + let cfg = MetadataRefreshConfig { + field_groups: vec![], + extra_fields: vec!["title".to_string(), "summary".to_string()], + ..Default::default() + }; + let filter = fields_filter_from_config(&cfg).unwrap(); + assert!(filter.contains("title")); + assert!(filter.contains("summary")); + assert_eq!(filter.len(), 2); + } + + #[test] + fn fields_filter_silently_drops_unknown_groups() { + let cfg = MetadataRefreshConfig { + field_groups: vec!["ratings".to_string(), "made_up_group".to_string()], + extra_fields: vec![], + ..Default::default() + }; + let filter = fields_filter_from_config(&cfg).unwrap(); + assert!(filter.contains("rating")); + assert!(filter.contains("externalRatings")); + assert_eq!(filter.len(), 2); } } diff --git a/src/tasks/handlers/plugin_auto_match.rs b/src/tasks/handlers/plugin_auto_match.rs index 90f8b6fd..a84cce27 100644 --- a/src/tasks/handlers/plugin_auto_match.rs +++ b/src/tasks/handlers/plugin_auto_match.rs @@ -931,6 +931,7 @@ impl TaskHandler for PluginAutoMatchHandler { fields_filter: None, thumbnail_service: self.thumbnail_service.clone(), event_broadcaster: event_broadcaster.cloned(), + dry_run: false, }; // Apply metadata diff --git a/src/tasks/handlers/refresh_library_metadata.rs b/src/tasks/handlers/refresh_library_metadata.rs index 4c29eee9..32182a85 100644 --- a/src/tasks/handlers/refresh_library_metadata.rs +++ b/src/tasks/handlers/refresh_library_metadata.rs @@ -365,6 +365,7 @@ async fn process_pair( fields_filter: fields_filter.cloned(), thumbnail_service: thumbnail_service.cloned(), event_broadcaster: event_broadcaster.cloned(), + dry_run: false, }; let apply_result = MetadataApplier::apply( diff --git a/tests/api/plugins.rs b/tests/api/plugins.rs index 1fa4a0f7..c9925e14 100644 --- a/tests/api/plugins.rs +++ b/tests/api/plugins.rs @@ -1778,6 +1778,37 @@ async fn test_preview_book_metadata_requires_auth() { assert_eq!(status, StatusCode::UNAUTHORIZED); } +#[tokio::test] +async fn test_apply_book_metadata_rejects_dry_run() { + // Book apply does not (yet) support dry-run. The handler must reject + // the request with 400 rather than silently performing a real apply. + let (db, _temp_dir) = setup_test_db().await; + let state = create_test_auth_state(db.clone()).await; + let app = create_test_router(state.clone()).await; + let token = create_admin_and_token(&db, &state).await; + + let fake_book_id = uuid::Uuid::new_v4(); + let fake_plugin_id = uuid::Uuid::new_v4(); + let body = json!({ + "pluginId": fake_plugin_id.to_string(), + "externalId": "12345", + "dryRun": true, + }); + let request = post_json_request_with_auth( + &format!("/api/v1/books/{}/metadata/apply", fake_book_id), + &body, + &token, + ); + let (status, _): (StatusCode, Option) = + make_json_request(app, request).await; + + assert_eq!( + status, + StatusCode::BAD_REQUEST, + "book apply must reject dryRun=true" + ); +} + #[tokio::test] async fn test_apply_book_metadata_requires_auth() { let (db, _temp_dir) = setup_test_db().await; diff --git a/tests/services.rs b/tests/services.rs index 854dde56..bd90fa23 100644 --- a/tests/services.rs +++ b/tests/services.rs @@ -1,4 +1,5 @@ mod services { + mod apply_dry_run; mod book_metadata_apply; mod metadata_apply; } diff --git a/tests/services/apply_dry_run.rs b/tests/services/apply_dry_run.rs new file mode 100644 index 00000000..ae8531d4 --- /dev/null +++ b/tests/services/apply_dry_run.rs @@ -0,0 +1,422 @@ +//! Tests for `MetadataApplier::apply` with `dry_run = true`. +//! +//! Verifies that: +//! - DB writes are gated (row state unchanged after a dry-run apply). +//! - The returned `dry_run_report` enumerates the would-be changes. +//! - Lock and permission skips still surface in `skipped_fields`, same code +//! path as a real apply. +//! - The report matches what a real apply *would* have written for the +//! selected fields. + +#[path = "../common/mod.rs"] +mod common; + +use chrono::Utc; +use codex::db::ScanningStrategy; +use codex::db::entities::plugins; +use codex::db::entities::series_metadata; +use codex::db::repositories::{LibraryRepository, SeriesMetadataRepository, SeriesRepository}; +use codex::services::metadata::{ApplyOptions, MetadataApplier}; +use codex::services::plugin::PluginSeriesMetadata; +use common::db::setup_test_db; +use sea_orm::{ActiveModelTrait, Set}; +use serde_json::json; +use std::collections::HashSet; +use uuid::Uuid; + +fn create_test_plugin() -> plugins::Model { + plugins::Model { + id: Uuid::new_v4(), + name: "test-plugin".to_string(), + display_name: "Test Plugin".to_string(), + description: None, + plugin_type: "system".to_string(), + command: "node".to_string(), + args: json!([]), + env: json!({}), + working_directory: None, + // Broad permission set so individual tests can pick & choose fields + // without per-field permission noise. Field locks are still tested + // explicitly. + permissions: json!([ + "metadata:write:title", + "metadata:write:summary", + "metadata:write:status", + "metadata:write:total_volume_count", + "metadata:write:total_chapter_count", + "metadata:write:ratings", + "metadata:write:year", + ]), + scopes: json!(["series:detail"]), + library_ids: json!([]), + credentials: None, + credential_delivery: "env".to_string(), + config: json!({}), + manifest: None, + enabled: true, + health_status: "healthy".to_string(), + failure_count: 0, + last_failure_at: None, + last_success_at: None, + disabled_reason: None, + rate_limit_requests_per_minute: None, + search_query_template: None, + search_preprocessing_rules: None, + auto_match_conditions: None, + use_existing_external_id: true, + metadata_targets: None, + internal_config: None, + created_at: Utc::now(), + updated_at: Utc::now(), + created_by: None, + updated_by: None, + } +} + +fn empty_metadata() -> PluginSeriesMetadata { + PluginSeriesMetadata { + external_id: "test-123".to_string(), + external_url: "https://example.com/test-123".to_string(), + title: None, + alternate_titles: vec![], + summary: None, + status: None, + year: None, + total_volume_count: None, + total_chapter_count: None, + language: None, + age_rating: None, + reading_direction: None, + genres: vec![], + tags: vec![], + authors: vec![], + artists: vec![], + publisher: None, + cover_url: None, + banner_url: None, + rating: None, + external_ratings: vec![], + external_links: vec![], + external_ids: vec![], + } +} + +#[tokio::test] +async fn dry_run_does_not_write_to_database() { + let (db, _temp_dir) = setup_test_db().await; + + let library = + LibraryRepository::create(&db, "Test Library", "/test/path", ScanningStrategy::Default) + .await + .unwrap(); + let series = SeriesRepository::create(&db, library.id, "Original Title", None) + .await + .unwrap(); + + let original = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + + // Plugin payload changes title, summary, year, and status. + let plugin = create_test_plugin(); + let mut plugin_metadata = empty_metadata(); + plugin_metadata.title = Some("Plugin Title".to_string()); + plugin_metadata.summary = Some("Plugin summary".to_string()); + plugin_metadata.year = Some(2024); + plugin_metadata.status = Some(codex::db::entities::SeriesStatus::Ongoing); + + let options = ApplyOptions { + dry_run: true, + ..Default::default() + }; + + let result = MetadataApplier::apply( + &db, + series.id, + library.id, + &plugin, + &plugin_metadata, + Some(&original), + &options, + ) + .await + .unwrap(); + + // applied_fields tracks what *would* have been applied even in dry-run, + // because the dry-run still walks the same branches. + assert!(result.applied_fields.contains(&"title".to_string())); + assert!(result.applied_fields.contains(&"summary".to_string())); + assert!(result.applied_fields.contains(&"year".to_string())); + assert!(result.applied_fields.contains(&"status".to_string())); + + // Report contents. + let report = result.dry_run_report.expect("dry_run set ⇒ report present"); + let fields: HashSet<&str> = report.changes.iter().map(|c| c.field.as_str()).collect(); + assert!(fields.contains("title")); + assert!(fields.contains("summary")); + assert!(fields.contains("year")); + assert!(fields.contains("status")); + + // No DB write happened: the row matches the original. + let after = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + assert_eq!(after.title, original.title); + assert_eq!(after.summary, original.summary); + assert_eq!(after.year, original.year); + assert_eq!(after.status, original.status); +} + +#[tokio::test] +async fn dry_run_real_apply_returns_no_report() { + let (db, _temp_dir) = setup_test_db().await; + + let library = + LibraryRepository::create(&db, "Test Library", "/test/path", ScanningStrategy::Default) + .await + .unwrap(); + let series = SeriesRepository::create(&db, library.id, "Original Title", None) + .await + .unwrap(); + + let original = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + + let plugin = create_test_plugin(); + let mut plugin_metadata = empty_metadata(); + plugin_metadata.summary = Some("Plugin summary".to_string()); + + let options = ApplyOptions::default(); // dry_run defaults to false + + let result = MetadataApplier::apply( + &db, + series.id, + library.id, + &plugin, + &plugin_metadata, + Some(&original), + &options, + ) + .await + .unwrap(); + + assert!(result.dry_run_report.is_none(), "real apply ⇒ no report"); + + // And the write actually happened. + let after = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + assert_eq!(after.summary, Some("Plugin summary".to_string())); +} + +#[tokio::test] +async fn dry_run_records_locked_fields_in_skipped_not_changes() { + let (db, _temp_dir) = setup_test_db().await; + + let library = + LibraryRepository::create(&db, "Test Library", "/test/path", ScanningStrategy::Default) + .await + .unwrap(); + let series = SeriesRepository::create(&db, library.id, "Original Title", None) + .await + .unwrap(); + + // Lock summary so the dry-run should skip-with-reason rather than record a change. + let metadata = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + let mut active: series_metadata::ActiveModel = metadata.into(); + active.summary = Set(Some("Original summary".to_string())); + active.summary_lock = Set(true); + active.update(&db).await.unwrap(); + + let original = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + assert!(original.summary_lock); + + let plugin = create_test_plugin(); + let mut plugin_metadata = empty_metadata(); + plugin_metadata.summary = Some("Plugin summary".to_string()); + plugin_metadata.year = Some(2024); + + let options = ApplyOptions { + dry_run: true, + ..Default::default() + }; + + let result = MetadataApplier::apply( + &db, + series.id, + library.id, + &plugin, + &plugin_metadata, + Some(&original), + &options, + ) + .await + .unwrap(); + + // summary should be in skipped_fields (locked), NOT in dry_run_report.changes. + let summary_skipped = result.skipped_fields.iter().any(|s| s.field == "summary"); + assert!( + summary_skipped, + "locked field should appear in skipped_fields" + ); + + let report = result.dry_run_report.expect("dry_run set ⇒ report present"); + let summary_in_report = report.changes.iter().any(|c| c.field == "summary"); + assert!( + !summary_in_report, + "locked field should NOT appear in dry_run_report.changes" + ); + + // year (not locked) should appear in the report. + let year_in_report = report.changes.iter().any(|c| c.field == "year"); + assert!(year_in_report, "unlocked field should appear in report"); + + // DB unchanged for summary either way (locked AND dry-run). + let after = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + assert_eq!(after.summary, Some("Original summary".to_string())); +} + +#[tokio::test] +async fn dry_run_filtered_by_fields_filter() { + let (db, _temp_dir) = setup_test_db().await; + + let library = + LibraryRepository::create(&db, "Test Library", "/test/path", ScanningStrategy::Default) + .await + .unwrap(); + let series = SeriesRepository::create(&db, library.id, "Original Title", None) + .await + .unwrap(); + + let original = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + + // Plugin returns multiple fields, but the filter only allows summary + year. + let plugin = create_test_plugin(); + let mut plugin_metadata = empty_metadata(); + plugin_metadata.title = Some("Plugin Title".to_string()); + plugin_metadata.summary = Some("Plugin summary".to_string()); + plugin_metadata.year = Some(2024); + plugin_metadata.status = Some(codex::db::entities::SeriesStatus::Ongoing); + + let mut filter = HashSet::new(); + filter.insert("summary".to_string()); + filter.insert("year".to_string()); + + let options = ApplyOptions { + dry_run: true, + fields_filter: Some(filter), + ..Default::default() + }; + + let result = MetadataApplier::apply( + &db, + series.id, + library.id, + &plugin, + &plugin_metadata, + Some(&original), + &options, + ) + .await + .unwrap(); + + let report = result.dry_run_report.expect("dry_run set ⇒ report present"); + let fields: HashSet<&str> = report.changes.iter().map(|c| c.field.as_str()).collect(); + + assert!(fields.contains("summary"), "summary must be in report"); + assert!(fields.contains("year"), "year must be in report"); + assert!( + !fields.contains("title"), + "title is filtered out, must not be in report" + ); + assert!( + !fields.contains("status"), + "status is filtered out, must not be in report" + ); + + // No DB write happened. + let after = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + assert_eq!(after.title, original.title); + assert_eq!(after.summary, original.summary); + assert_eq!(after.status, original.status); +} + +#[tokio::test] +async fn dry_run_records_before_and_after_for_simple_field() { + let (db, _temp_dir) = setup_test_db().await; + + let library = + LibraryRepository::create(&db, "Test Library", "/test/path", ScanningStrategy::Default) + .await + .unwrap(); + let series = SeriesRepository::create(&db, library.id, "Original Title", None) + .await + .unwrap(); + + // Set a known summary on the row so we can assert `before`. + let metadata = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + let mut active: series_metadata::ActiveModel = metadata.into(); + active.summary = Set(Some("Original summary".to_string())); + active.update(&db).await.unwrap(); + + let original = SeriesMetadataRepository::get_by_series_id(&db, series.id) + .await + .unwrap() + .unwrap(); + + let plugin = create_test_plugin(); + let mut plugin_metadata = empty_metadata(); + plugin_metadata.summary = Some("New summary".to_string()); + + let options = ApplyOptions { + dry_run: true, + ..Default::default() + }; + + let result = MetadataApplier::apply( + &db, + series.id, + library.id, + &plugin, + &plugin_metadata, + Some(&original), + &options, + ) + .await + .unwrap(); + + let report = result.dry_run_report.expect("dry_run ⇒ report present"); + let summary_change = report + .changes + .iter() + .find(|c| c.field == "summary") + .expect("summary in report"); + + // before is wrapped because the column is `Option`. + let before_json = summary_change.before.as_ref().expect("before set"); + assert_eq!(before_json, &json!("Original summary")); + assert_eq!(summary_change.after, json!("New summary")); +} diff --git a/tests/services/metadata_apply.rs b/tests/services/metadata_apply.rs index 61eca513..0e9d4d96 100644 --- a/tests/services/metadata_apply.rs +++ b/tests/services/metadata_apply.rs @@ -704,6 +704,7 @@ async fn test_apply_count_fields_filtered_out_by_allowlist() { fields_filter: Some(filter), thumbnail_service: None, event_broadcaster: None, + dry_run: false, }; let result = MetadataApplier::apply( diff --git a/web/openapi.json b/web/openapi.json index b32c8275..38c2959b 100644 --- a/web/openapi.json +++ b/web/openapi.json @@ -21692,6 +21692,21 @@ } } }, + "DryRunReportDto": { + "type": "object", + "description": "Dry-run preview attached to [`MetadataApplyResponse`] when the request\nset `dryRun = true`. Absent on real applies.", + "required": [ + "changes" + ], + "properties": { + "changes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FieldChangeDto" + } + } + } + }, "DuplicateGroup": { "type": "object", "description": "A group of duplicate books", @@ -22834,6 +22849,23 @@ "not_provided" ] }, + "FieldChangeDto": { + "type": "object", + "description": "One would-be field change recorded during a dry-run apply.\n\nMirrors `services::metadata::apply::FieldChange`, kept as a distinct DTO\nto keep the wire-format frozen even if internal types evolve.", + "required": [ + "field", + "after" + ], + "properties": { + "after": {}, + "before": { + "description": "Current value, where cheaply available. `null` for fields backed by\njoined tables (genres, tags, alternate titles, ratings, etc.)." + }, + "field": { + "type": "string" + } + } + }, "FieldOperator": { "oneOf": [ { @@ -26091,6 +26123,10 @@ "externalId" ], "properties": { + "dryRun": { + "type": "boolean", + "description": "When `true`, the call simulates the apply without writing to the\ndatabase. Returns the same `appliedFields`/`skippedFields` plus an\nextra `dryRunReport` showing every would-be change. Default `false`." + }, "externalId": { "type": "string", "description": "External ID from the plugin's search results" @@ -26129,6 +26165,17 @@ }, "description": "Fields that were applied" }, + "dryRunReport": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/DryRunReportDto", + "description": "Populated only when the request set `dryRun = true`. Each entry is a\nfield that *would* have been written." + } + ] + }, "message": { "type": "string", "description": "Message" @@ -34363,6 +34410,26 @@ } } }, + { + "type": "object", + "description": "Scheduled per-library metadata refresh.\n\nWalks the library's series, applies the per-library\n`metadata_refresh_config` (field groups, providers, matching strategy),\nand refreshes metadata via the existing `MetadataApplier`.", + "required": [ + "libraryId", + "type" + ], + "properties": { + "libraryId": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "refresh_library_metadata" + ] + } + } + }, { "type": "object", "description": "Generate thumbnails for books in a scope (library, series, specific books, or all)\nThis is a fan-out task that enqueues individual GenerateThumbnail tasks", diff --git a/web/src/types/api.generated.ts b/web/src/types/api.generated.ts index b81c767e..5944940c 100644 --- a/web/src/types/api.generated.ts +++ b/web/src/types/api.generated.ts @@ -8952,6 +8952,13 @@ export interface components { /** @description Whether the dismissal was recorded */ dismissed: boolean; }; + /** + * @description Dry-run preview attached to [`MetadataApplyResponse`] when the request + * set `dryRun = true`. Absent on real applies. + */ + DryRunReportDto: { + changes: components["schemas"]["FieldChangeDto"][]; + }; /** @description A group of duplicate books */ DuplicateGroup: { /** @description List of book IDs that share this hash */ @@ -9452,6 +9459,21 @@ export interface components { * @enum {string} */ FieldApplyStatus: "will_apply" | "locked" | "no_permission" | "unchanged" | "not_provided"; + /** + * @description One would-be field change recorded during a dry-run apply. + * + * Mirrors `services::metadata::apply::FieldChange`, kept as a distinct DTO + * to keep the wire-format frozen even if internal types evolve. + */ + FieldChangeDto: { + after: unknown; + /** + * @description Current value, where cheaply available. `null` for fields backed by + * joined tables (genres, tags, alternate titles, ratings, etc.). + */ + before?: unknown; + field: string; + }; /** @description Operators for string and equality comparisons */ FieldOperator: { /** @enum {string} */ @@ -11098,6 +11120,12 @@ export interface components { MetadataAction: "search" | "get" | "match"; /** @description Request to apply metadata from a plugin */ MetadataApplyRequest: { + /** + * @description When `true`, the call simulates the apply without writing to the + * database. Returns the same `appliedFields`/`skippedFields` plus an + * extra `dryRunReport` showing every would-be change. Default `false`. + */ + dryRun?: boolean; /** @description External ID from the plugin's search results */ externalId: string; /** @description Optional list of fields to apply (default: all applicable fields) */ @@ -11112,6 +11140,7 @@ export interface components { MetadataApplyResponse: { /** @description Fields that were applied */ appliedFields: string[]; + dryRunReport?: null | components["schemas"]["DryRunReportDto"]; /** @description Message */ message: string; /** @description Fields that were skipped (with reasons) */ @@ -15889,6 +15918,11 @@ export interface components { source: string; /** @enum {string} */ type: "refresh_metadata"; + } | { + /** Format: uuid */ + libraryId: string; + /** @enum {string} */ + type: "refresh_library_metadata"; } | { bookIds?: string[] | null; force?: boolean; From b27769db0bfdc00fee318401cd37160d8c232523 Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 12:07:22 -0700 Subject: [PATCH 04/12] feat(metadata): add MatchingStrategy and re-match path to scheduled refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce an explicit `MatchingStrategy::{ExistingExternalIdOnly, AllowReMatch}` enum on `ApplyOptions` so the scheduled refresh can opt into "don't re-match — only refresh series that already have a stored provider ID" behavior, while every existing manual-apply call site keeps its historical "search if no ID" semantics via the `AllowReMatch` default. The `RefreshLibraryMetadataHandler` now picks the strategy from `metadata_refresh_config.existing_source_ids_only`. Strict mode keeps the planner's plan-time gating; loose mode invokes a new `rematch_external_id` helper that calls `metadata/series/match`, then feeds the returned external ID into the existing `metadata/series/get` + `MetadataApplier` flow. A match miss is surfaced as a new `no_match_candidate` bucket on `RunSummary` so it stays distinguishable from the strict-mode `no_external_id` skip. A new `RefreshSkipReason` taxonomy module provides a stable, serde-friendly superset of the planner's narrower skip enum, with a `From` lift so planner reasons can be widened into the public taxonomy that the upcoming HTTP dry-run endpoint will surface. Manual-apply call sites in the v1 plugin handlers and `plugin_auto_match` were converted to `..Default::default()` so they pick up the new field without changing behavior. Tests cover the default-strategy guard, the loose-mode re-match branch, the skip-reason identifiers and serde shape, and the planner-to-public lift. --- src/api/routes/v1/handlers/plugin_actions.rs | 10 +- src/services/metadata/apply.rs | 54 ++++ src/services/metadata/mod.rs | 7 +- src/services/metadata/refresh_skip_reason.rs | 188 ++++++++++++ src/tasks/handlers/plugin_auto_match.rs | 5 +- .../handlers/refresh_library_metadata.rs | 282 +++++++++++++++--- tests/services/metadata_apply.rs | 1 + 7 files changed, 508 insertions(+), 39 deletions(-) create mode 100644 src/services/metadata/refresh_skip_reason.rs diff --git a/src/api/routes/v1/handlers/plugin_actions.rs b/src/api/routes/v1/handlers/plugin_actions.rs index d59a63b6..588e3891 100644 --- a/src/api/routes/v1/handlers/plugin_actions.rs +++ b/src/api/routes/v1/handlers/plugin_actions.rs @@ -1431,13 +1431,17 @@ pub async fn apply_series_metadata( .await .map_err(|e| ApiError::Internal(format!("Failed to get current metadata: {}", e)))?; - // Build apply options + // Build apply options. Manual API endpoints keep the historical + // `AllowReMatch` default to preserve existing behavior — the new + // `ExistingExternalIdOnly` strategy is opt-in via the scheduled + // refresh task. let dry_run = request.dry_run; let options = ApplyOptions { fields_filter: request.fields.map(|f| f.into_iter().collect()), thumbnail_service: Some(state.thumbnail_service.clone()), event_broadcaster: Some(state.event_broadcaster.clone()), dry_run, + ..Default::default() }; // Apply metadata using the shared service @@ -1654,12 +1658,14 @@ pub async fn auto_match_series_metadata( .await .map_err(|e| ApiError::Internal(format!("Failed to get current metadata: {}", e)))?; - // Build apply options (no field filtering for auto-match) + // Build apply options (no field filtering for auto-match). + // Manual auto-match preserves the historical `AllowReMatch` default. let options = ApplyOptions { fields_filter: None, // Apply all fields thumbnail_service: Some(state.thumbnail_service.clone()), event_broadcaster: Some(state.event_broadcaster.clone()), dry_run: false, + ..Default::default() }; // Apply metadata using the shared service diff --git a/src/services/metadata/apply.rs b/src/services/metadata/apply.rs index 28f5c7bb..ee3600d3 100644 --- a/src/services/metadata/apply.rs +++ b/src/services/metadata/apply.rs @@ -72,6 +72,31 @@ pub struct MetadataApplyResult { pub dry_run_report: Option, } +/// How a caller decides whether the applier should accept metadata for a +/// series with no stored external ID. +/// +/// The applier itself never re-matches; it just applies the metadata it's +/// handed. This enum is passed through as configuration for the *caller* +/// (typically a task handler) so it can decide whether to call +/// `metadata/series/match` or skip the series entirely. Carrying it in +/// [`ApplyOptions`] keeps the contract visible at every apply site without +/// putting matching logic inside the applier. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub enum MatchingStrategy { + /// Refresh only series that already have a stored external ID for the + /// chosen provider. Series without one are skipped (not re-matched). + /// + /// This is the safe default for scheduled refreshes against a curated + /// library: the user picked the match once, the scheduler honors it. + ExistingExternalIdOnly, + /// Allow re-matching when no external ID is stored. The handler is + /// expected to call `metadata/series/match` and proceed with the best + /// result. This preserves the manual-apply contract that existed before + /// the scheduled-refresh feature. + #[default] + AllowReMatch, +} + /// Options for controlling metadata application behavior. #[derive(Clone, Default)] pub struct ApplyOptions { @@ -85,6 +110,11 @@ pub struct ApplyOptions { /// returned instead. Locks and permission checks still run — same code /// path, just gated writes. pub dry_run: bool, + /// Caller-provided matching strategy. Read by the handlers that drive + /// the applier; the applier itself doesn't branch on it. Defaults to + /// [`MatchingStrategy::AllowReMatch`] so existing manual-apply call + /// sites preserve their previous behavior. + pub matching_strategy: MatchingStrategy, } /// Service for applying plugin metadata to series. @@ -833,3 +863,27 @@ impl MetadataApplier { }) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn matching_strategy_default_is_allow_rematch() { + // Manual-apply call sites use `..Default::default()`; flipping the + // default would silently switch them to `ExistingExternalIdOnly`, + // which would skip series that don't have a stored external ID. + // Pin the default explicitly so future refactors are caught here. + assert_eq!(MatchingStrategy::default(), MatchingStrategy::AllowReMatch); + } + + #[test] + fn apply_options_default_uses_allow_rematch() { + // Same guard one level up: ApplyOptions::default carries the + // historical "search if no ID" behavior. + let opts = ApplyOptions::default(); + assert_eq!(opts.matching_strategy, MatchingStrategy::AllowReMatch); + assert!(!opts.dry_run); + assert!(opts.fields_filter.is_none()); + } +} diff --git a/src/services/metadata/mod.rs b/src/services/metadata/mod.rs index 979964d0..f0c54bcb 100644 --- a/src/services/metadata/mod.rs +++ b/src/services/metadata/mod.rs @@ -14,8 +14,9 @@ pub mod field_groups; pub mod preprocessing; pub mod refresh_config; pub mod refresh_planner; +pub mod refresh_skip_reason; -pub use apply::{ApplyOptions, MetadataApplier, SkippedField}; +pub use apply::{ApplyOptions, MatchingStrategy, MetadataApplier, SkippedField}; pub use book_apply::{BookApplyOptions, BookMetadataApplier}; pub use cover::CoverService; #[allow(unused_imports)] @@ -27,6 +28,8 @@ pub use refresh_config::{MetadataRefreshConfig, parse_metadata_refresh_config}; pub use refresh_config::{MetadataRefreshConfigPatch, ProviderOverride}; #[allow(unused_imports)] pub use refresh_planner::{ - PlannedRefresh, RefreshPlan, RefreshPlanner, SkipReason as RefreshSkipReason, SkippedRefresh, + PlannedRefresh, RefreshPlan, RefreshPlanner, SkipReason as PlannerSkipReason, SkippedRefresh, fields_filter_from_config, }; +#[allow(unused_imports)] +pub use refresh_skip_reason::RefreshSkipReason; diff --git a/src/services/metadata/refresh_skip_reason.rs b/src/services/metadata/refresh_skip_reason.rs new file mode 100644 index 00000000..79838b48 --- /dev/null +++ b/src/services/metadata/refresh_skip_reason.rs @@ -0,0 +1,188 @@ +//! Stable skip-reason taxonomy for the scheduled metadata refresh. +//! +//! Used by the dry-run preview, the task summary JSON, and the planner. The +//! string forms (returned by [`RefreshSkipReason::as_str`]) are part of the +//! public API surface — they show up in HTTP responses and stored task +//! results. Don't rename them without a migration story. +//! +//! The planner currently owns its own narrower [`super::refresh_planner::SkipReason`] +//! enum that covers reasons knowable at planning time (no external ID, recently +//! synced, provider unavailable). This module is a *superset* that also names +//! reasons only knowable mid-run (all fields locked, all fields filtered out, +//! plugin call failed). Both enums use the same string identifiers so the +//! union counts in the task summary stay coherent. +//! +//! [`From`] is provided for the planner +//! reasons. Run-time reasons are constructed directly by the handler. + +use serde::{Deserialize, Serialize}; + +use super::refresh_planner::SkipReason as PlannerSkipReason; + +/// One stable reason a `(series, provider)` pair was skipped. +/// +/// Variants are flat and self-describing so the JSON form is friendly to +/// consume from the frontend. The discriminant strings (also returned by +/// [`Self::as_str`]) are stable. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum RefreshSkipReason { + /// `existing_source_ids_only = true` and the series has no stored + /// external ID for the provider. Surfaced by the planner. + NoExternalId, + /// `last_synced_at` for the (series, provider) pair is younger than + /// `skip_recently_synced_within_s`. Surfaced by the planner. + RecentlySynced, + /// Provider configuration references a plugin that isn't installed or + /// is disabled. Recorded once per plan rather than per series, but the + /// taxonomy lists it for completeness. + ProviderUnavailable, + /// Re-match attempt found no candidate above the confidence threshold. + /// Only reachable in `AllowReMatch` mode. + NoMatchCandidate, + /// The applier reported every field as locked (none written). The pair + /// was technically processed but nothing changed; surfacing it as a + /// distinct reason avoids inflating "succeeded" counts when in fact + /// the user's locks blocked the run. + AllFieldsLocked, + /// `fields_filter` excluded every field the provider returned. Distinct + /// from `AllFieldsLocked` because the cause is config, not user locks. + NoFieldsAfterFilter, + /// Plugin returned an error or timed out. The handler still increments + /// the `failed` counter; this variant exists so the dry-run preview can + /// label the row instead of dropping it silently. + PluginCallFailed, +} + +impl RefreshSkipReason { + /// Stable string identifier. Used as the JSON key in task-summary + /// `skipped` maps and as the value of the `kind` discriminant. + pub fn as_str(&self) -> &'static str { + match self { + Self::NoExternalId => "no_external_id", + Self::RecentlySynced => "recently_synced", + Self::ProviderUnavailable => "provider_unavailable", + Self::NoMatchCandidate => "no_match_candidate", + Self::AllFieldsLocked => "all_fields_locked", + Self::NoFieldsAfterFilter => "no_fields_after_filter", + Self::PluginCallFailed => "plugin_call_failed", + } + } +} + +impl From for RefreshSkipReason { + fn from(value: PlannerSkipReason) -> Self { + match value { + PlannerSkipReason::NoExternalId => Self::NoExternalId, + PlannerSkipReason::RecentlySynced { .. } => Self::RecentlySynced, + PlannerSkipReason::ProviderUnavailable { .. } => Self::ProviderUnavailable, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn as_str_covers_every_variant() { + // Every variant returns a non-empty stable identifier. + for r in [ + RefreshSkipReason::NoExternalId, + RefreshSkipReason::RecentlySynced, + RefreshSkipReason::ProviderUnavailable, + RefreshSkipReason::NoMatchCandidate, + RefreshSkipReason::AllFieldsLocked, + RefreshSkipReason::NoFieldsAfterFilter, + RefreshSkipReason::PluginCallFailed, + ] { + assert!(!r.as_str().is_empty()); + } + } + + #[test] + fn no_external_id_has_stable_identifier() { + assert_eq!(RefreshSkipReason::NoExternalId.as_str(), "no_external_id"); + } + + #[test] + fn recently_synced_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::RecentlySynced.as_str(), + "recently_synced" + ); + } + + #[test] + fn provider_unavailable_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::ProviderUnavailable.as_str(), + "provider_unavailable" + ); + } + + #[test] + fn no_match_candidate_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::NoMatchCandidate.as_str(), + "no_match_candidate" + ); + } + + #[test] + fn all_fields_locked_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::AllFieldsLocked.as_str(), + "all_fields_locked" + ); + } + + #[test] + fn no_fields_after_filter_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::NoFieldsAfterFilter.as_str(), + "no_fields_after_filter" + ); + } + + #[test] + fn plugin_call_failed_has_stable_identifier() { + assert_eq!( + RefreshSkipReason::PluginCallFailed.as_str(), + "plugin_call_failed" + ); + } + + #[test] + fn from_planner_no_external_id() { + assert_eq!( + RefreshSkipReason::from(PlannerSkipReason::NoExternalId), + RefreshSkipReason::NoExternalId + ); + } + + #[test] + fn from_planner_recently_synced_drops_timestamp() { + let r = RefreshSkipReason::from(PlannerSkipReason::RecentlySynced { + last_synced_at: chrono::Utc::now(), + }); + assert_eq!(r, RefreshSkipReason::RecentlySynced); + } + + #[test] + fn from_planner_provider_unavailable_drops_provider() { + let r = RefreshSkipReason::from(PlannerSkipReason::ProviderUnavailable { + provider: "plugin:gone".to_string(), + }); + assert_eq!(r, RefreshSkipReason::ProviderUnavailable); + } + + #[test] + fn serde_round_trip_uses_kind_discriminant() { + let r = RefreshSkipReason::NoExternalId; + let json = serde_json::to_value(&r).unwrap(); + assert_eq!(json, serde_json::json!({"kind": "no_external_id"})); + let back: RefreshSkipReason = serde_json::from_value(json).unwrap(); + assert_eq!(back, r); + } +} diff --git a/src/tasks/handlers/plugin_auto_match.rs b/src/tasks/handlers/plugin_auto_match.rs index a84cce27..ac565df5 100644 --- a/src/tasks/handlers/plugin_auto_match.rs +++ b/src/tasks/handlers/plugin_auto_match.rs @@ -926,12 +926,15 @@ impl TaskHandler for PluginAutoMatchHandler { let current_metadata = SeriesMetadataRepository::get_by_series_id(db, series_id).await?; - // Build apply options + // Build apply options. Auto-match preserves the historical + // `AllowReMatch` default — the matching decision was made just + // above, so the field is informational here. let options = ApplyOptions { fields_filter: None, thumbnail_service: self.thumbnail_service.clone(), event_broadcaster: event_broadcaster.cloned(), dry_run: false, + ..Default::default() }; // Apply metadata diff --git a/src/tasks/handlers/refresh_library_metadata.rs b/src/tasks/handlers/refresh_library_metadata.rs index 32182a85..1343fc52 100644 --- a/src/tasks/handlers/refresh_library_metadata.rs +++ b/src/tasks/handlers/refresh_library_metadata.rs @@ -7,19 +7,22 @@ //! [`MetadataApplier`]. Per-pair errors are isolated: one failure increments //! a counter and the handler keeps going. //! -//! ## Phase scope (Phase 2) +//! ## Matching strategy //! -//! - Only the **`existing_source_ids_only = true`** path is wired. The -//! [`RefreshPlanner`] does the gating, so the handler never sees a -//! no-external-id pair when strict mode is on. -//! - When strict mode is off, the planner still returns pairs without an -//! external ID. Phase 2 logs a warning and skips them — Phase 4 will -//! introduce a real `MatchingStrategy` that lets the handler call -//! `metadata/series/match` to re-match. -//! - Field-group resolution is the simple union returned by -//! [`fields_filter_from_config`]. Phase 3 ships the proper resolver that -//! expands group names like `"ratings"` to the underlying field names -//! the applier understands. +//! Driven by [`MetadataRefreshConfig::existing_source_ids_only`]: +//! +//! - `true` ⇒ [`MatchingStrategy::ExistingExternalIdOnly`]: the +//! [`RefreshPlanner`] gates pairs at planning time, so the handler never +//! sees a no-external-id pair. Skips count as `no_external_id`. +//! - `false` ⇒ [`MatchingStrategy::AllowReMatch`]: the planner returns +//! no-external-id pairs and the handler calls `metadata/series/match` +//! for each one. A successful match is then fetched via +//! `metadata/series/get` and applied like any other pair. A miss is +//! recorded as `no_match_candidate` (distinct from `no_external_id`, +//! which only applies in strict mode). +//! +//! Field-group resolution lives in +//! [`crate::services::metadata::field_groups::fields_for_groups`] (Phase 3). use anyhow::{Context, Result}; use sea_orm::DatabaseConnection; @@ -31,15 +34,16 @@ use tracing::{debug, error, info, warn}; use crate::db::entities::tasks; use crate::db::repositories::{ LibraryRepository, PluginsRepository, SeriesExternalIdRepository, SeriesMetadataRepository, + SeriesRepository, }; use crate::events::{EntityChangeEvent, EntityEvent, EventBroadcaster, TaskProgressEvent}; use crate::services::ThumbnailService; use crate::services::metadata::refresh_planner::{ PlannedRefresh, RefreshPlan, RefreshPlanner, fields_filter_from_config, }; -use crate::services::metadata::{ApplyOptions, MetadataApplier}; +use crate::services::metadata::{ApplyOptions, MatchingStrategy, MetadataApplier}; use crate::services::plugin::PluginManager; -use crate::services::plugin::protocol::MetadataGetParams; +use crate::services::plugin::protocol::{MetadataGetParams, MetadataMatchParams}; use crate::tasks::handlers::TaskHandler; use crate::tasks::types::TaskResult; @@ -61,6 +65,10 @@ struct RunSummary { skipped_no_external_id: u32, skipped_recently_synced: u32, skipped_provider_unavailable: u32, + /// Loose-mode `match_series` returned no candidate above the + /// confidence floor, so the pair was skipped. Distinct from + /// `no_external_id` (which is the strict-mode skip reason). + skipped_no_match_candidate: u32, fields_applied_total: u32, } @@ -78,6 +86,7 @@ impl RunSummary { "no_external_id": self.skipped_no_external_id, "recently_synced": self.skipped_recently_synced, "provider_unavailable": self.skipped_provider_unavailable, + "no_match_candidate": self.skipped_no_match_candidate, }, "fields_applied_total": self.fields_applied_total, "unresolved_providers": unresolved_providers, @@ -223,6 +232,14 @@ impl TaskHandler for RefreshLibraryMetadataHandler { let fields_filter = fields_filter_from_config(&config); let library_name = library.name.clone(); + // Mirror the config toggle into the strategy enum so future + // call sites that key off `MatchingStrategy` (e.g. dry-run + // preview, manual API endpoints) share the same vocabulary. + let matching_strategy = if config.existing_source_ids_only { + MatchingStrategy::ExistingExternalIdOnly + } else { + MatchingStrategy::AllowReMatch + }; for (idx, planned) in plan.planned.iter().enumerate() { let pair_outcome = process_pair( @@ -233,6 +250,7 @@ impl TaskHandler for RefreshLibraryMetadataHandler { self.thumbnail_service.as_ref(), event_broadcaster, self.plugin_manager.as_ref(), + matching_strategy, ) .await; @@ -241,17 +259,26 @@ impl TaskHandler for RefreshLibraryMetadataHandler { summary.succeeded += 1; summary.fields_applied_total += applied as u32; } - Err(SkipBecauseNoExternalId) => { - // Loose-mode planner returns these; Phase 4 will - // convert this into a real re-match path. + Err(PairError::NoExternalId) => { + // Reached only in `ExistingExternalIdOnly` strategy + // when somehow a no-id pair slipped past the + // planner (defense in depth — shouldn't happen). warn!( - "Task {}: Skipping series {} for plugin {} — no external ID and \ - re-matching is not yet implemented (Phase 4)", + "Task {}: Skipping series {} for plugin {} — no external ID under \ + ExistingExternalIdOnly strategy", task.id, planned.series_id, planned.plugin.name ); summary.skipped_no_external_id += 1; } - Err(SkipBecauseFailed(err)) => { + Err(PairError::NoMatchCandidate) => { + // Loose-mode re-match found nothing usable. + info!( + "Task {}: No match candidate for series {} via plugin {}; skipping", + task.id, planned.series_id, planned.plugin.name + ); + summary.skipped_no_match_candidate += 1; + } + Err(PairError::Failed(err)) => { summary.failed += 1; error!( "Task {}: Failed refresh for series {} via plugin {}: {:#}", @@ -288,7 +315,8 @@ impl TaskHandler for RefreshLibraryMetadataHandler { summary.failed, summary.skipped_no_external_id + summary.skipped_recently_synced - + summary.skipped_provider_unavailable, + + summary.skipped_provider_unavailable + + summary.skipped_no_match_candidate, ); Ok(TaskResult::success_with_data( @@ -302,13 +330,18 @@ impl TaskHandler for RefreshLibraryMetadataHandler { /// Fine-grained outcome for a single `(series, provider)` pair. /// /// Wrapping the `Result` makes the handler loop self-documenting and lets -/// us distinguish "planner gave us a no-ID pair" from "the call failed". +/// us distinguish strict-mode skips, loose-mode misses, and real failures. enum PairError { + /// Strict mode (`ExistingExternalIdOnly`) but the pair has no stored + /// external ID. Should normally be filtered by the planner, but kept + /// as a defensive branch. NoExternalId, + /// Loose mode (`AllowReMatch`) called `metadata/series/match` and got + /// no usable candidate. + NoMatchCandidate, + /// Plugin call or apply failed. Failed(anyhow::Error), } -use PairError::Failed as SkipBecauseFailed; -use PairError::NoExternalId as SkipBecauseNoExternalId; /// Process one planned pair. Returns the number of fields applied on /// success, or a typed reason for non-success. @@ -324,15 +357,32 @@ async fn process_pair( thumbnail_service: Option<&Arc>, event_broadcaster: Option<&Arc>, plugin_manager: &PluginManager, + matching_strategy: MatchingStrategy, ) -> Result { - // Phase 2: only the existing-ID path is wired. - let Some(external_id_record) = planned.existing_external_id.as_ref() else { - return Err(PairError::NoExternalId); - }; - let external_id = external_id_record.external_id.clone(); let plugin = &planned.plugin; - // 1. Fetch metadata from the plugin. + // 1. Resolve which external ID to fetch. + // + // Strict mode: the planner already filtered no-id pairs, so + // `existing_external_id` is always Some here. The defensive branch + // returns `NoExternalId` if that ever changes. + // + // Loose mode: when there's no stored ID, call `metadata/series/match` + // with the series' title to find one. A miss is `NoMatchCandidate`. + let external_id = if let Some(record) = planned.existing_external_id.as_ref() { + record.external_id.clone() + } else { + match matching_strategy { + MatchingStrategy::ExistingExternalIdOnly => { + return Err(PairError::NoExternalId); + } + MatchingStrategy::AllowReMatch => { + rematch_external_id(db, planned, plugin_manager).await? + } + } + }; + + // 2. Fetch metadata from the plugin. let get_params = MetadataGetParams { external_id: external_id.clone(), }; @@ -356,7 +406,7 @@ async fn process_pair( } }; - // 2. Apply via the shared MetadataApplier. + // 3. Apply via the shared MetadataApplier. let current_metadata = SeriesMetadataRepository::get_by_series_id(db, planned.series_id) .await .map_err(|e| PairError::Failed(e.context("Failed to load current metadata")))?; @@ -366,6 +416,7 @@ async fn process_pair( thumbnail_service: thumbnail_service.cloned(), event_broadcaster: event_broadcaster.cloned(), dry_run: false, + matching_strategy, }; let apply_result = MetadataApplier::apply( @@ -387,8 +438,10 @@ async fn process_pair( let applied_count = apply_result.applied_fields.len(); - // 3. Bump `last_synced_at` on the external-id row so the recency + // 4. Bump `last_synced_at` on the external-id row so the recency // guard works on the next run. `external_url` is preserved. + // For the loose-mode re-match path, this is also where the freshly + // discovered external_id gets persisted for the next run. let external_url = plugin_metadata.external_url.clone(); if let Err(e) = SeriesExternalIdRepository::upsert_for_plugin( db, @@ -406,7 +459,7 @@ async fn process_pair( ); } - // 4. Emit a per-series metadata-updated event so the frontend + // 5. Emit a per-series metadata-updated event so the frontend // invalidates its cache. Reuses the same event the manual // apply path emits (see plugin_auto_match.rs). if applied_count > 0 @@ -423,7 +476,7 @@ async fn process_pair( )); } - // 5. Best-effort plugin success bookkeeping. The plugin manager + // 6. Best-effort plugin success bookkeeping. The plugin manager // already records success on its own in the happy path, but we // keep this for parity with plugin_auto_match. if let Err(e) = PluginsRepository::record_success(db, plugin.id).await { @@ -433,6 +486,83 @@ async fn process_pair( Ok(applied_count) } +/// Loose-mode helper: call `metadata/series/match` to find a candidate +/// external ID for a series with no stored mapping. +/// +/// The match call uses the series' metadata title (when set) and falls back +/// to `series.name`. Year/author hints are intentionally omitted — they're +/// available to the manual flow via `series_context`, but the scheduled +/// refresh path keeps the call shape minimal so misconfigured matchers +/// don't accidentally narrow results. +/// +/// Returns the external ID on success, [`PairError::NoMatchCandidate`] when +/// the plugin returned `None`, or [`PairError::Failed`] on plugin error. +async fn rematch_external_id( + db: &DatabaseConnection, + planned: &PlannedRefresh, + plugin_manager: &PluginManager, +) -> Result { + let plugin = &planned.plugin; + + let series = SeriesRepository::get_by_id(db, planned.series_id) + .await + .map_err(|e| { + PairError::Failed(e.context(format!( + "Failed to load series {} for re-match", + planned.series_id + ))) + })? + .ok_or_else(|| { + PairError::Failed(anyhow::anyhow!( + "Series {} disappeared during refresh", + planned.series_id + )) + })?; + + let metadata = SeriesMetadataRepository::get_by_series_id(db, planned.series_id) + .await + .map_err(|e| { + PairError::Failed(e.context("Failed to load current metadata for re-match")) + })?; + + let title = metadata + .as_ref() + .map(|m| m.title.clone()) + .filter(|t| !t.is_empty()) + .unwrap_or_else(|| series.name.clone()); + let year = metadata.as_ref().and_then(|m| m.year); + + let match_params = MetadataMatchParams { + title, + year, + author: None, + }; + + let match_fut = plugin_manager.match_series(plugin.id, match_params); + let match_result = match tokio::time::timeout(PER_PAIR_TIMEOUT, match_fut).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + return Err(PairError::Failed(anyhow::Error::new(e).context(format!( + "Plugin '{}' failed to re-match series {}", + plugin.name, planned.series_id + )))); + } + Err(_elapsed) => { + return Err(PairError::Failed(anyhow::anyhow!( + "Plugin '{}' timed out after {}s re-matching series {}", + plugin.name, + PER_PAIR_TIMEOUT.as_secs(), + planned.series_id + ))); + } + }; + + match match_result { + Some(r) => Ok(r.external_id), + None => Err(PairError::NoMatchCandidate), + } +} + #[cfg(test)] mod tests { use super::*; @@ -475,6 +605,7 @@ mod tests { assert_eq!(json["skipped"]["no_external_id"], 0); assert_eq!(json["skipped"]["recently_synced"], 0); assert_eq!(json["skipped"]["provider_unavailable"], 0); + assert_eq!(json["skipped"]["no_match_candidate"], 0); assert_eq!(json["fields_applied_total"], 0); assert!(json["unresolved_providers"].as_array().unwrap().is_empty()); } @@ -487,6 +618,7 @@ mod tests { skipped_no_external_id: 2, skipped_recently_synced: 1, skipped_provider_unavailable: 0, + skipped_no_match_candidate: 1, fields_applied_total: 12, }; let json = s.into_json(7, vec!["plugin:gone".to_string()]); @@ -496,6 +628,7 @@ mod tests { assert_eq!(json["fields_applied_total"], 12); assert_eq!(json["skipped"]["no_external_id"], 2); assert_eq!(json["skipped"]["recently_synced"], 1); + assert_eq!(json["skipped"]["no_match_candidate"], 1); assert_eq!(json["unresolved_providers"][0], "plugin:gone"); } @@ -663,6 +796,87 @@ mod tests { assert_eq!(data["failed"], 0); } + /// Phase 4 contract: when `existing_source_ids_only = false`, the + /// planner stops gating no-id pairs and the handler is responsible for + /// invoking the re-match path. The handler thus *attempts* to call + /// `metadata/series/match` for every unmatched series. Without a real + /// plugin process the call fails, and the `failed` counter increments + /// rather than `skipped_no_external_id` — proving loose mode is wired. + /// This is the negative-space evidence that strict mode and loose mode + /// take distinct branches. + #[tokio::test] + async fn handler_attempts_rematch_in_loose_mode() { + setup_test_encryption_key(); + let db = setup_test_db().await; + + let library = LibraryRepository::create( + &db, + "lib-loose", + "/tmp/lib-loose", + ScanningStrategy::Default, + ) + .await + .unwrap(); + // One unmatched series. + let _series = SeriesRepository::create(&db, library.id, "Series Loose", None) + .await + .unwrap(); + // Enabled plugin without a real process — match_series will fail. + let _plugin = PluginsRepository::create( + &db, + "mangabaka", + "MangaBaka", + None, + "system", + "node", + vec!["dist/index.js".to_string()], + vec![], + None, + vec![PluginPermission::MetadataWriteSummary], + vec![PluginScope::SeriesDetail], + vec![], + None, + "env", + None, + true, + None, + None, + ) + .await + .unwrap(); + + let cfg = MetadataRefreshConfig { + enabled: true, + providers: vec!["plugin:mangabaka".to_string()], + existing_source_ids_only: false, + skip_recently_synced_within_s: 0, + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .unwrap(); + + let task = enqueue_and_load( + &db, + TaskType::RefreshLibraryMetadata { + library_id: library.id, + }, + ) + .await; + let handler = make_handler(&db); + let result = handler.handle(&task, &db, None).await.unwrap(); + + assert!(result.success); + let data = result.data.unwrap(); + // Planner included the series (loose mode keeps no-id pairs). + assert_eq!(data["planned"], 1); + // The plugin call failed (no real process), so the pair was + // counted as `failed` — NOT as `skipped_no_external_id`. + assert_eq!(data["failed"], 1); + assert_eq!(data["skipped"]["no_external_id"], 0); + assert_eq!(data["succeeded"], 0); + } + #[tokio::test] async fn handler_errors_when_library_missing() { let db = setup_test_db().await; diff --git a/tests/services/metadata_apply.rs b/tests/services/metadata_apply.rs index 0e9d4d96..f622c607 100644 --- a/tests/services/metadata_apply.rs +++ b/tests/services/metadata_apply.rs @@ -705,6 +705,7 @@ async fn test_apply_count_fields_filtered_out_by_allowlist() { thumbnail_service: None, event_broadcaster: None, dry_run: false, + ..Default::default() }; let result = MetadataApplier::apply( From 4d62d69a5dca875c8787c98e1a8a33706d173ba8 Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 12:28:48 -0700 Subject: [PATCH 05/12] feat(scheduler): wire per-library scheduled metadata refresh Register a cron entry per library whose `metadata_refresh_config.enabled = true`, firing `RefreshLibraryMetadata { library_id }` on each tick. The new loader runs from `start()` so `reload_schedules()` rebuilds refresh entries alongside the scan ones with no extra wiring on the API side. The cron-job closure runs a skip-if-already-running guard before enqueuing: if a `pending` or `processing` `refresh_library_metadata` task already exists for the same library, the firing is dropped with a warn log. This is belt and suspenders on top of `TaskRepository::enqueue` deduplication so operators see an explicit log line when a cron tick fires while the previous run is still draining. Per-library config errors (invalid cron, invalid timezone, missing library row) log a warning and skip just that library instead of aborting scheduler boot. Per-library timezone overrides fall back to the server default when parsing fails, matching the existing `add_library_schedule` behavior. Adds integration tests for enabled/disabled configs, per-library timezone, invalid cron tolerance, multi-library coexistence, and reload pickup, plus unit tests on the in-flight detector. --- src/scheduler/mod.rs | 235 ++++++++++++++++++++++++++++++++++++++- tests/scheduler.rs | 259 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 493 insertions(+), 1 deletion(-) diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index d393e12a..db9eff24 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -1,12 +1,14 @@ use anyhow::{Context, Result}; use chrono_tz::Tz; -use sea_orm::DatabaseConnection; +use sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, PaginatorTrait, QueryFilter}; use tokio_cron_scheduler::{Job, JobScheduler}; use tracing::{debug, error, info, warn}; use uuid::Uuid; +use crate::db::entities::{prelude::Tasks, tasks}; use crate::db::repositories::{LibraryRepository, TaskRepository}; use crate::scanner::{ScanMode, ScanningConfig}; +use crate::services::metadata::parse_metadata_refresh_config; use crate::services::settings::SettingsService; use crate::tasks::types::TaskType; use crate::utils::cron::{normalize_cron_expression, parse_timezone}; @@ -57,6 +59,9 @@ impl Scheduler { // Load library scan schedules self.load_library_schedules().await?; + // Load per-library scheduled metadata-refresh entries + self.load_library_metadata_refresh_schedules().await?; + // Load deduplication schedule self.load_deduplication_schedule().await?; @@ -536,6 +541,145 @@ impl Scheduler { Ok(()) } + /// Load per-library scheduled metadata-refresh entries. + /// + /// Mirrors [`Self::load_library_schedules`] but reads `metadata_refresh_config` + /// instead of `scanning_config` and enqueues `RefreshLibraryMetadata` tasks. + /// Libraries with the schedule disabled (or missing config) get no entry. + async fn load_library_metadata_refresh_schedules(&mut self) -> Result<()> { + let libraries = LibraryRepository::list_all(&self.db).await?; + + for library in libraries { + if let Err(e) = self.add_library_metadata_refresh_schedule(library.id).await { + warn!( + "Failed to add metadata refresh schedule for library {}: {}", + library.name, e + ); + } + } + + Ok(()) + } + + /// Resolve the timezone for a library metadata-refresh job. + /// + /// Priority: refresh-config `timezone` > server default timezone. + fn resolve_metadata_refresh_timezone(&self, tz_str: Option<&str>) -> Tz { + if let Some(tz_str) = tz_str { + match parse_timezone(tz_str) { + Ok(tz) => return tz, + Err(e) => { + warn!( + "Invalid metadata refresh timezone '{}': {}. Using server default ({})", + tz_str, e, self.default_tz + ); + } + } + } + self.default_tz + } + + /// Add or update a library's scheduled metadata-refresh cron entry. + /// + /// Reads `libraries.metadata_refresh_config`. When `enabled = true`, registers + /// a cron job that enqueues [`TaskType::RefreshLibraryMetadata`] with a + /// skip-if-already-running guard so a long previous run cannot pile up + /// duplicates if the next tick fires before it finishes. + pub async fn add_library_metadata_refresh_schedule(&mut self, library_id: Uuid) -> Result<()> { + let library = LibraryRepository::get_by_id(&self.db, library_id) + .await? + .ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?; + + let config = match parse_metadata_refresh_config(library.metadata_refresh_config.as_deref()) + { + Ok(c) => c, + Err(e) => { + warn!( + "Library {} has invalid metadata_refresh_config ({}); skipping schedule", + library.name, e + ); + return Ok(()); + } + }; + + if !config.enabled { + debug!( + "Skipping library {} - scheduled metadata refresh disabled", + library.name + ); + return Ok(()); + } + + let cron_schedule = normalize_cron_expression(&config.cron_schedule) + .context("Invalid cron expression for library metadata refresh schedule")?; + + let tz = self.resolve_metadata_refresh_timezone(config.timezone.as_deref()); + + let db = self.db.clone(); + let library_name = library.name.clone(); + + let job = Job::new_async_tz(cron_schedule.as_str(), tz, move |_uuid, _lock| { + let db = db.clone(); + let library_name = library_name.clone(); + + Box::pin(async move { + // Skip-if-already-running guard: if a refresh task for this + // library is still pending or processing, drop this firing + // rather than letting cron ticks pile up duplicate work. + match has_active_refresh_for_library(&db, library_id).await { + Ok(true) => { + warn!( + "Skipping scheduled metadata refresh for library {}: previous task still running", + library_name + ); + return; + } + Ok(false) => {} + Err(e) => { + warn!( + "Failed to check for in-flight refresh task for library {}: {}; proceeding", + library_name, e + ); + } + } + + info!( + "Triggering scheduled metadata refresh for library {}", + library_name + ); + + let task_type = TaskType::RefreshLibraryMetadata { library_id }; + match TaskRepository::enqueue(&db, task_type, None).await { + Ok(_) => { + debug!( + "Successfully enqueued scheduled metadata refresh for library {}", + library_name + ); + } + Err(e) => { + error!( + "Failed to enqueue scheduled metadata refresh for library {}: {}", + library_name, e + ); + } + } + }) + }) + .context("Failed to create metadata refresh cron job")?; + + self.scheduler + .add(job) + .await + .context("Failed to add metadata refresh job to scheduler")?; + + info!( + "Added metadata refresh schedule for library {} with cron '{}' (timezone: {})", + library.name, cron_schedule, tz + ); + + Ok(()) + } + /// Reload all schedules (useful when libraries or settings are updated) pub async fn reload_schedules(&mut self) -> Result<()> { info!("Reloading all schedules"); @@ -558,12 +702,101 @@ impl Scheduler { } } +/// Whether an active (`pending` or `processing`) `refresh_library_metadata` +/// task already exists for the given library. +/// +/// Used by the per-library refresh cron job to avoid stacking duplicate work +/// when a previous run hasn't drained yet. +async fn has_active_refresh_for_library(db: &DatabaseConnection, library_id: Uuid) -> Result { + let count = Tasks::find() + .filter(tasks::Column::TaskType.eq("refresh_library_metadata")) + .filter(tasks::Column::LibraryId.eq(library_id)) + .filter(tasks::Column::Status.is_in(["pending", "processing"])) + .count(db) + .await + .context("Failed to count active refresh tasks")?; + Ok(count > 0) +} + #[cfg(test)] mod tests { + use super::*; + use crate::db::repositories::LibraryRepository; + use crate::db::test_helpers::setup_test_db; + use crate::models::ScanningStrategy; + use crate::tasks::types::TaskType; #[test] fn test_scheduler_can_be_created() { // This test is a placeholder - proper tests require a database connection // See tests/scheduler/mod.rs for integration tests } + + #[tokio::test] + async fn has_active_refresh_for_library_returns_false_when_no_tasks() { + let db = setup_test_db().await; + let library = LibraryRepository::create(&db, "Lib", "/tmp/lib", ScanningStrategy::Default) + .await + .unwrap(); + + let active = has_active_refresh_for_library(&db, library.id) + .await + .unwrap(); + assert!( + !active, + "Fresh DB has no refresh tasks for the library, helper must report false" + ); + } + + #[tokio::test] + async fn has_active_refresh_for_library_detects_pending_task() { + let db = setup_test_db().await; + let library = LibraryRepository::create(&db, "Lib", "/tmp/lib", ScanningStrategy::Default) + .await + .unwrap(); + + TaskRepository::enqueue( + &db, + TaskType::RefreshLibraryMetadata { + library_id: library.id, + }, + None, + ) + .await + .unwrap(); + + let active = has_active_refresh_for_library(&db, library.id) + .await + .unwrap(); + assert!( + active, + "A pending refresh task for the same library must be detected" + ); + } + + #[tokio::test] + async fn has_active_refresh_for_library_is_scoped_per_library() { + let db = setup_test_db().await; + let lib_a = LibraryRepository::create(&db, "A", "/tmp/a", ScanningStrategy::Default) + .await + .unwrap(); + let lib_b = LibraryRepository::create(&db, "B", "/tmp/b", ScanningStrategy::Default) + .await + .unwrap(); + + TaskRepository::enqueue( + &db, + TaskType::RefreshLibraryMetadata { + library_id: lib_a.id, + }, + None, + ) + .await + .unwrap(); + + let active_a = has_active_refresh_for_library(&db, lib_a.id).await.unwrap(); + let active_b = has_active_refresh_for_library(&db, lib_b.id).await.unwrap(); + assert!(active_a, "library A has the in-flight task"); + assert!(!active_b, "library B has no in-flight task"); + } } diff --git a/tests/scheduler.rs b/tests/scheduler.rs index d7257a5c..a0a3915d 100644 --- a/tests/scheduler.rs +++ b/tests/scheduler.rs @@ -501,3 +501,262 @@ async fn test_scheduler_timezone_with_deduplication_cron() { scheduler.start().await.expect("Failed to start scheduler"); scheduler.shutdown().await.expect("Failed to shutdown"); } + +// ============================================================================= +// Per-Library Metadata Refresh Schedule Tests (Phase 5) +// ============================================================================= + +use codex::db::repositories::LibraryRepository; +use codex::services::metadata::MetadataRefreshConfig; +use common::create_test_library; + +/// A disabled refresh config (the default) registers no scheduler entry — the +/// scheduler still starts cleanly and no task is enqueued. +#[tokio::test] +async fn test_metadata_refresh_disabled_does_not_enqueue() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + // Default config: enabled = false + let cfg = MetadataRefreshConfig::default(); + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + + let tasks = TaskRepository::list( + &db, + Some("pending".to_string()), + Some("refresh_library_metadata".to_string()), + Some(100), + ) + .await + .expect("Failed to list tasks"); + + assert_eq!( + tasks.len(), + 0, + "Disabled refresh schedule must not enqueue any task" + ); + + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// An enabled config with a valid (far-future) cron registers without panicking +/// and without firing during the test window. +#[tokio::test] +async fn test_metadata_refresh_enabled_loads_without_firing() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 4 * * *".to_string(), // daily at 04:00, won't fire during test + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + + // Cron is far in the future, so no task should be enqueued during this test. + let tasks = TaskRepository::list( + &db, + Some("pending".to_string()), + Some("refresh_library_metadata".to_string()), + Some(100), + ) + .await + .expect("Failed to list tasks"); + assert_eq!(tasks.len(), 0); + + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// Enabling the schedule via a per-library config override timezone is parsed +/// successfully (or, if invalid, falls back to the server timezone without +/// failing the schedule). +#[tokio::test] +async fn test_metadata_refresh_with_per_library_timezone() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 4 * * *".to_string(), + timezone: Some("Europe/Paris".to_string()), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// Invalid timezone strings on the per-library config fall back to the server +/// timezone and must not fail scheduler startup. +#[tokio::test] +async fn test_metadata_refresh_invalid_timezone_falls_back() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 4 * * *".to_string(), + timezone: Some("Not/A/Timezone".to_string()), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// Invalid cron expressions are surfaced as a per-library warning but do not +/// fail scheduler startup; other libraries' schedules continue to load. +#[tokio::test] +async fn test_metadata_refresh_invalid_cron_does_not_break_startup() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "not a cron".to_string(), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + + // Should not error: invalid cron logs a warning and skips just this library. + scheduler.start().await.expect("Failed to start scheduler"); + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// `reload_schedules` picks up a config that flipped from disabled → enabled +/// without requiring a server restart. +#[tokio::test] +async fn test_metadata_refresh_reload_picks_up_new_config() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + + // Flip enabled = true + let cfg = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 4 * * *".to_string(), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library.id, &cfg) + .await + .expect("Failed to set refresh config"); + + // Reload should rebuild scan + metadata-refresh schedules. + scheduler + .reload_schedules() + .await + .expect("Failed to reload schedules"); + + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// Multiple libraries with different refresh configs can coexist on the same +/// scheduler. +#[tokio::test] +async fn test_metadata_refresh_multiple_libraries() { + let (db, _temp_dir) = setup_test_db().await; + let lib_a = create_test_library(&db, "LibA", "/tmp/liba").await; + let lib_b = create_test_library(&db, "LibB", "/tmp/libb").await; + + let cfg_a = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 0 3 * * *".to_string(), + ..Default::default() + }; + let cfg_b = MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 30 5 * * *".to_string(), + timezone: Some("UTC".to_string()), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, lib_a.id, &cfg_a) + .await + .expect("Failed to set refresh config for A"); + LibraryRepository::set_metadata_refresh_config(&db, lib_b.id, &cfg_b) + .await + .expect("Failed to set refresh config for B"); + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// `add_library_metadata_refresh_schedule` is a no-op when the library has no +/// refresh config (column = NULL). +#[tokio::test] +async fn test_add_library_metadata_refresh_schedule_no_config() { + let (db, _temp_dir) = setup_test_db().await; + let library = create_test_library(&db, "MyLib", "/tmp/mylib").await; + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + + // No config saved; should be a no-op (default = disabled). + scheduler + .add_library_metadata_refresh_schedule(library.id) + .await + .expect("Should succeed silently when refresh disabled"); + + scheduler.shutdown().await.expect("Failed to shutdown"); +} + +/// `add_library_metadata_refresh_schedule` errors clearly when the library +/// does not exist, matching the behavior of `add_library_schedule`. +#[tokio::test] +async fn test_add_library_metadata_refresh_schedule_unknown_library() { + let (db, _temp_dir) = setup_test_db().await; + + let mut scheduler = Scheduler::new(db.clone(), "UTC") + .await + .expect("Failed to create scheduler"); + scheduler.start().await.expect("Failed to start scheduler"); + + let bogus = uuid::Uuid::new_v4(); + let err = scheduler + .add_library_metadata_refresh_schedule(bogus) + .await + .expect_err("Unknown library must error"); + assert!(err.to_string().contains("Library not found")); + + scheduler.shutdown().await.expect("Failed to shutdown"); +} From f2372ccb372b2a27d1a31fd5388b993c883fff8f Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 13:37:18 -0700 Subject: [PATCH 06/12] feat(api): add metadata-refresh CRUD, run-now, and dry-run endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the per-library scheduled metadata refresh to HTTP under a new `Metadata Refresh` OpenAPI tag: - GET/PATCH /api/v1/libraries/{id}/metadata-refresh — read and partially update the config; PATCH validates cron, timezone, field groups, and providers (disabled plugins accepted, missing rejected) and reloads the scheduler so changes apply without a restart. - POST /api/v1/libraries/{id}/metadata-refresh/run-now — enqueue the task immediately; shares the scheduler's skip-if-already-running guard and returns 409 when a refresh is in flight. - POST /api/v1/libraries/{id}/metadata-refresh/dry-run — synchronous preview bounded by sampleSize (default 5, cap 20) with a per-pair plugin timeout. Plans against an optional configOverride, fetches via metadata/series/get (or match in loose mode), and runs MetadataApplier with dry_run=true so no DB writes happen. - GET /api/v1/metadata-refresh/field-groups — static catalog of the 12 field groups with id, label, and concrete camelCase fields, derived from FieldGroup::all() so adding a variant extends the API automatically. The PATCH DTO uses PatchValue for nullable fields (timezone, per_provider_overrides) so absent leaves the value untouched, explicit null clears, and a value sets. Per-pair plugin failures during dry-run surface as a single skip-row in the sample so the UI can render "this series couldn't be previewed" instead of silently dropping it. Includes integration and unit tests covering happy paths, validation failures, permission gating, the run-now conflict guard, and the field-group catalog shape. --- docs/api/openapi.json | 346 ++++++++ src/api/docs.rs | 12 + src/api/routes/v1/dto/metadata_refresh.rs | 205 +++++ src/api/routes/v1/dto/mod.rs | 2 + .../routes/v1/handlers/metadata_refresh.rs | 745 ++++++++++++++++++ src/api/routes/v1/handlers/mod.rs | 1 + src/api/routes/v1/routes/libraries.rs | 17 + src/api/routes/v1/routes/misc.rs | 5 + tests/api.rs | 1 + tests/api/metadata_refresh.rs | 565 +++++++++++++ web/openapi.json | 346 ++++++++ web/src/types/api.generated.ts | 149 ++++ 12 files changed, 2394 insertions(+) create mode 100644 src/api/routes/v1/dto/metadata_refresh.rs create mode 100644 src/api/routes/v1/handlers/metadata_refresh.rs create mode 100644 tests/api/metadata_refresh.rs diff --git a/docs/api/openapi.json b/docs/api/openapi.json index 38c2959b..3ccfb9aa 100644 --- a/docs/api/openapi.json +++ b/docs/api/openapi.json @@ -21707,6 +21707,131 @@ } } }, + "DryRunRequest": { + "type": "object", + "description": "Body for `POST /libraries/{id}/metadata-refresh/dry-run`.\n\n`configOverride` lets the UI preview a config that hasn't been saved yet —\n\"what would happen if I clicked Save right now?\". When absent, the saved\nconfig is used.", + "properties": { + "configOverride": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/MetadataRefreshConfigDto", + "description": "Optional unsaved config to preview. When absent, the library's saved\nconfig is used." + } + ], + "default": null + }, + "sampleSize": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "description": "Number of series to preview. Defaults to 5, capped at 20.", + "default": null, + "minimum": 0 + } + } + }, + "DryRunResponse": { + "type": "object", + "description": "Full dry-run response.", + "required": [ + "sample", + "totalEligible", + "estSkippedNoId", + "estSkippedRecentlySynced" + ], + "properties": { + "estSkippedNoId": { + "type": "integer", + "format": "int32", + "description": "Estimated `(series, provider)` pairs the planner skipped because the\nseries has no stored external ID for the provider (strict mode only).", + "minimum": 0 + }, + "estSkippedRecentlySynced": { + "type": "integer", + "format": "int32", + "description": "Estimated pairs skipped because their `last_synced_at` is younger than\nthe recency cutoff.", + "minimum": 0 + }, + "sample": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DryRunSeriesDelta" + }, + "description": "Per-series deltas, capped at the requested sample size." + }, + "totalEligible": { + "type": "integer", + "format": "int32", + "description": "Total number of `(series, provider)` pairs the planner produced before\nthe sample cap.", + "minimum": 0 + }, + "unresolvedProviders": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Provider strings from the config that don't resolve to an enabled\nplugin. Surfaced verbatim so the UI can highlight typos or disabled\nplugins." + } + } + }, + "DryRunSeriesDelta": { + "type": "object", + "description": "One series' would-be deltas in a dry-run preview.", + "required": [ + "seriesId", + "seriesTitle", + "provider", + "changes" + ], + "properties": { + "changes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FieldChangeDto" + }, + "description": "Fields that would be written." + }, + "provider": { + "type": "string", + "description": "Plugin id (`\"plugin:\"`) that produced this delta." + }, + "seriesId": { + "type": "string", + "format": "uuid" + }, + "seriesTitle": { + "type": "string" + }, + "skipped": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DryRunSkippedFieldDto" + }, + "description": "Fields that would be skipped (locked, no permission, etc.)." + } + } + }, + "DryRunSkippedFieldDto": { + "type": "object", + "description": "A field skipped during a dry-run apply, with the reason.", + "required": [ + "field", + "reason" + ], + "properties": { + "field": { + "type": "string" + }, + "reason": { + "type": "string" + } + } + }, "DuplicateGroup": { "type": "object", "description": "A group of duplicate books", @@ -22866,6 +22991,32 @@ } } }, + "FieldGroupDto": { + "type": "object", + "description": "One entry from `GET /api/v1/metadata-refresh/field-groups`.", + "required": [ + "id", + "label", + "fields" + ], + "properties": { + "fields": { + "type": "array", + "items": { + "type": "string" + }, + "description": "camelCase field names this group expands into. Match the\n`should_apply_field` call sites in `MetadataApplier`." + }, + "id": { + "type": "string", + "description": "Snake_case identifier stored in [`MetadataRefreshConfig::field_groups`]." + }, + "label": { + "type": "string", + "description": "Human-readable label for UI display." + } + } + }, "FieldOperator": { "oneOf": [ { @@ -26710,6 +26861,165 @@ } } }, + "MetadataRefreshConfigDto": { + "type": "object", + "description": "Full read response for a library's scheduled metadata-refresh config.\n\nWhen the library has no stored config, the server returns\n[`MetadataRefreshConfig::default`] so clients always render something.", + "required": [ + "enabled", + "cronSchedule", + "fieldGroups", + "extraFields", + "providers", + "existingSourceIdsOnly", + "skipRecentlySyncedWithinS", + "maxConcurrency" + ], + "properties": { + "cronSchedule": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "existingSourceIdsOnly": { + "type": "boolean" + }, + "extraFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "fieldGroups": { + "type": "array", + "items": { + "type": "string" + } + }, + "maxConcurrency": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "perProviderOverrides": { + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/components/schemas/ProviderOverrideDto" + }, + "propertyNames": { + "type": "string" + } + }, + "providers": { + "type": "array", + "items": { + "type": "string" + } + }, + "skipRecentlySyncedWithinS": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "timezone": { + "type": [ + "string", + "null" + ] + } + } + }, + "MetadataRefreshConfigPatchDto": { + "type": "object", + "description": "Partial PATCH body. Uses [`PatchValue`] for nullable fields so clients can\ndistinguish \"leave alone\" from \"explicit clear\".\n\nAll other fields use plain `Option` because clearing a non-nullable\nfield doesn't make sense (e.g. you can't \"unset\" `enabled` — you can only\nflip it).", + "properties": { + "cronSchedule": { + "type": [ + "string", + "null" + ], + "default": null + }, + "enabled": { + "type": [ + "boolean", + "null" + ], + "default": null + }, + "existingSourceIdsOnly": { + "type": [ + "boolean", + "null" + ], + "default": null + }, + "extraFields": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "fieldGroups": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "maxConcurrency": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "default": null, + "minimum": 0 + }, + "perProviderOverrides": { + "type": [ + "object", + "null" + ] + }, + "providers": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "skipRecentlySyncedWithinS": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "default": null, + "minimum": 0 + }, + "timezone": { + "type": [ + "string", + "null" + ], + "default": null + } + } + }, "MetricsCleanupResponse": { "type": "object", "description": "Response for cleanup operation", @@ -30384,6 +30694,24 @@ } } }, + "ProviderOverrideDto": { + "type": "object", + "description": "Per-provider override placeholder (Phase 8). Mirrors\n[`crate::services::metadata::ProviderOverride`] for the wire format.", + "properties": { + "extraFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "fieldGroups": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, "PublicSettingDto": { "type": "object", "description": "Public setting DTO (for non-admin users)\n\nA simplified setting DTO that only includes the key and value,\nused for public display settings accessible to all authenticated users.", @@ -31764,6 +32092,20 @@ } } }, + "RunNowResponse": { + "type": "object", + "description": "Response for `POST /libraries/{id}/metadata-refresh/run-now`.", + "required": [ + "taskId" + ], + "properties": { + "taskId": { + "type": "string", + "format": "uuid", + "description": "Background task ID. Subscribe to events on `/api/v1/events/stream` to\nfollow progress." + } + } + }, "ScanStatusDto": { "type": "object", "description": "Scan status response", @@ -36967,6 +37309,10 @@ "name": "Plugin Actions", "description": "Plugin action discovery and execution for metadata fetching" }, + { + "name": "Metadata Refresh", + "description": "Per-library scheduled metadata refresh configuration, dry-run preview, and run-now" + }, { "name": "User Plugins", "description": "User-facing plugin management, OAuth, and configuration" diff --git a/src/api/docs.rs b/src/api/docs.rs index f7af0595..07ce8b1b 100644 --- a/src/api/docs.rs +++ b/src/api/docs.rs @@ -889,6 +889,17 @@ The following paths are exempt from rate limiting: v1::dto::EnqueueBulkAutoMatchRequest, v1::dto::EnqueueLibraryAutoMatchRequest, + // Scheduled metadata refresh DTOs (Phase 6) + v1::dto::MetadataRefreshConfigDto, + v1::dto::MetadataRefreshConfigPatchDto, + v1::dto::ProviderOverrideDto, + v1::dto::RunNowResponse, + v1::dto::DryRunRequest, + v1::dto::DryRunResponse, + v1::dto::DryRunSeriesDelta, + v1::dto::DryRunSkippedFieldDto, + v1::dto::FieldGroupDto, + // Task Queue DTOs v1::handlers::task_queue::CreateTaskRequest, v1::handlers::task_queue::CreateTaskResponse, @@ -1010,6 +1021,7 @@ The following paths are exempt from rate limiting: (name = "Settings", description = "Runtime configuration settings (admin only)"), (name = "Plugins", description = "Admin-managed external plugin processes"), (name = "Plugin Actions", description = "Plugin action discovery and execution for metadata fetching"), + (name = "Metadata Refresh", description = "Per-library scheduled metadata refresh configuration, dry-run preview, and run-now"), (name = "User Plugins", description = "User-facing plugin management, OAuth, and configuration"), (name = "Recommendations", description = "Personalized recommendation endpoints"), (name = "Metrics", description = "Application metrics and statistics"), diff --git a/src/api/routes/v1/dto/metadata_refresh.rs b/src/api/routes/v1/dto/metadata_refresh.rs new file mode 100644 index 00000000..59bc6b75 --- /dev/null +++ b/src/api/routes/v1/dto/metadata_refresh.rs @@ -0,0 +1,205 @@ +//! DTOs for the scheduled metadata-refresh API (Phase 6). +//! +//! Wraps [`MetadataRefreshConfig`] for the wire protocol, plus dedicated +//! request/response types for run-now, dry-run, and field-group enumeration. +//! +//! [`MetadataRefreshConfig`]: crate::services::metadata::MetadataRefreshConfig + +use crate::services::metadata::{MetadataRefreshConfig, ProviderOverride}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use utoipa::ToSchema; +use uuid::Uuid; + +use super::patch::PatchValue; +use super::plugins::FieldChangeDto; + +// --------------------------------------------------------------------------- +// Config CRUD +// --------------------------------------------------------------------------- + +/// Per-provider override placeholder (Phase 8). Mirrors +/// [`crate::services::metadata::ProviderOverride`] for the wire format. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ProviderOverrideDto { + #[serde(default)] + pub field_groups: Vec, + #[serde(default)] + pub extra_fields: Vec, +} + +impl From for ProviderOverrideDto { + fn from(o: ProviderOverride) -> Self { + Self { + field_groups: o.field_groups, + extra_fields: o.extra_fields, + } + } +} + +impl From for ProviderOverride { + fn from(o: ProviderOverrideDto) -> Self { + Self { + field_groups: o.field_groups, + extra_fields: o.extra_fields, + } + } +} + +/// Full read response for a library's scheduled metadata-refresh config. +/// +/// When the library has no stored config, the server returns +/// [`MetadataRefreshConfig::default`] so clients always render something. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct MetadataRefreshConfigDto { + pub enabled: bool, + pub cron_schedule: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub timezone: Option, + pub field_groups: Vec, + pub extra_fields: Vec, + pub providers: Vec, + pub existing_source_ids_only: bool, + pub skip_recently_synced_within_s: u32, + pub max_concurrency: u8, + #[serde(skip_serializing_if = "Option::is_none")] + pub per_provider_overrides: Option>, +} + +impl From for MetadataRefreshConfigDto { + fn from(c: MetadataRefreshConfig) -> Self { + Self { + enabled: c.enabled, + cron_schedule: c.cron_schedule, + timezone: c.timezone, + field_groups: c.field_groups, + extra_fields: c.extra_fields, + providers: c.providers, + existing_source_ids_only: c.existing_source_ids_only, + skip_recently_synced_within_s: c.skip_recently_synced_within_s, + max_concurrency: c.max_concurrency, + per_provider_overrides: c + .per_provider_overrides + .map(|m| m.into_iter().map(|(k, v)| (k, v.into())).collect()), + } + } +} + +/// Partial PATCH body. Uses [`PatchValue`] for nullable fields so clients can +/// distinguish "leave alone" from "explicit clear". +/// +/// All other fields use plain `Option` because clearing a non-nullable +/// field doesn't make sense (e.g. you can't "unset" `enabled` — you can only +/// flip it). +#[derive(Debug, Clone, Default, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase", default)] +pub struct MetadataRefreshConfigPatchDto { + pub enabled: Option, + pub cron_schedule: Option, + #[serde(default)] + #[schema(value_type = Option)] + pub timezone: PatchValue, + pub field_groups: Option>, + pub extra_fields: Option>, + pub providers: Option>, + pub existing_source_ids_only: Option, + pub skip_recently_synced_within_s: Option, + pub max_concurrency: Option, + #[serde(default)] + #[schema(value_type = Option)] + pub per_provider_overrides: PatchValue>, +} + +// --------------------------------------------------------------------------- +// Run now / dry run +// --------------------------------------------------------------------------- + +/// Response for `POST /libraries/{id}/metadata-refresh/run-now`. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RunNowResponse { + /// Background task ID. Subscribe to events on `/api/v1/events/stream` to + /// follow progress. + pub task_id: Uuid, +} + +/// Body for `POST /libraries/{id}/metadata-refresh/dry-run`. +/// +/// `configOverride` lets the UI preview a config that hasn't been saved yet — +/// "what would happen if I clicked Save right now?". When absent, the saved +/// config is used. +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase", default)] +pub struct DryRunRequest { + /// Number of series to preview. Defaults to 5, capped at 20. + #[serde(skip_serializing_if = "Option::is_none")] + pub sample_size: Option, + /// Optional unsaved config to preview. When absent, the library's saved + /// config is used. + #[serde(skip_serializing_if = "Option::is_none")] + pub config_override: Option, +} + +/// One series' would-be deltas in a dry-run preview. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DryRunSeriesDelta { + pub series_id: Uuid, + pub series_title: String, + /// Plugin id (`"plugin:"`) that produced this delta. + pub provider: String, + /// Fields that would be written. + pub changes: Vec, + /// Fields that would be skipped (locked, no permission, etc.). + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub skipped: Vec, +} + +/// A field skipped during a dry-run apply, with the reason. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DryRunSkippedFieldDto { + pub field: String, + pub reason: String, +} + +/// Full dry-run response. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DryRunResponse { + /// Per-series deltas, capped at the requested sample size. + pub sample: Vec, + /// Total number of `(series, provider)` pairs the planner produced before + /// the sample cap. + pub total_eligible: u32, + /// Estimated `(series, provider)` pairs the planner skipped because the + /// series has no stored external ID for the provider (strict mode only). + pub est_skipped_no_id: u32, + /// Estimated pairs skipped because their `last_synced_at` is younger than + /// the recency cutoff. + pub est_skipped_recently_synced: u32, + /// Provider strings from the config that don't resolve to an enabled + /// plugin. Surfaced verbatim so the UI can highlight typos or disabled + /// plugins. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub unresolved_providers: Vec, +} + +// --------------------------------------------------------------------------- +// Field group enumeration +// --------------------------------------------------------------------------- + +/// One entry from `GET /api/v1/metadata-refresh/field-groups`. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct FieldGroupDto { + /// Snake_case identifier stored in [`MetadataRefreshConfig::field_groups`]. + pub id: String, + /// Human-readable label for UI display. + pub label: String, + /// camelCase field names this group expands into. Match the + /// `should_apply_field` call sites in `MetadataApplier`. + pub fields: Vec, +} diff --git a/src/api/routes/v1/dto/mod.rs b/src/api/routes/v1/dto/mod.rs index f65f1bed..6e78d2fa 100644 --- a/src/api/routes/v1/dto/mod.rs +++ b/src/api/routes/v1/dto/mod.rs @@ -12,6 +12,7 @@ pub mod duplicates; pub mod filter; pub mod info; pub mod library; +pub mod metadata_refresh; pub mod metrics; pub mod oidc; pub mod page; @@ -43,6 +44,7 @@ pub use duplicates::*; pub use filter::*; pub use info::*; pub use library::*; +pub use metadata_refresh::*; pub use metrics::*; pub use oidc::*; pub use page::*; diff --git a/src/api/routes/v1/handlers/metadata_refresh.rs b/src/api/routes/v1/handlers/metadata_refresh.rs new file mode 100644 index 00000000..f8bbd703 --- /dev/null +++ b/src/api/routes/v1/handlers/metadata_refresh.rs @@ -0,0 +1,745 @@ +//! HTTP handlers for the scheduled metadata-refresh feature (Phase 6). +//! +//! Endpoints: +//! - `GET /api/v1/libraries/{id}/metadata-refresh` — read config +//! - `PATCH /api/v1/libraries/{id}/metadata-refresh` — partial update +//! - `POST /api/v1/libraries/{id}/metadata-refresh/run-now` — enqueue task +//! - `POST /api/v1/libraries/{id}/metadata-refresh/dry-run` — preview +//! - `GET /api/v1/metadata-refresh/field-groups` — group catalog +//! +//! Permissions: read endpoints require `libraries:read`; mutating and +//! action endpoints require `libraries:write`. + +use crate::api::{AppState, error::ApiError, extractors::AuthContext, permissions::Permission}; +use crate::db::repositories::{ + LibraryRepository, PluginsRepository, SeriesMetadataRepository, SeriesRepository, + TaskRepository, +}; +use crate::services::metadata::{ + ApplyOptions, FieldGroup, MatchingStrategy, MetadataApplier, MetadataRefreshConfig, + PlannedRefresh, ProviderOverride, RefreshPlan, RefreshPlanner, fields_filter_from_config, + fields_for_group, +}; +use crate::services::plugin::protocol::{MetadataGetParams, MetadataMatchParams}; +use crate::tasks::types::TaskType; +use crate::utils::cron::{validate_cron_expression, validate_timezone}; + +use super::super::dto::{ + DryRunRequest, DryRunResponse, DryRunSeriesDelta, DryRunSkippedFieldDto, FieldChangeDto, + FieldGroupDto, MetadataRefreshConfigDto, MetadataRefreshConfigPatchDto, RunNowResponse, +}; + +use axum::{ + Json, + extract::{Path, State}, +}; +use sea_orm::DatabaseConnection; +use std::collections::BTreeMap; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; +use uuid::Uuid; + +/// Per-pair timeout when fetching metadata for the dry-run preview. Kept +/// short because the user is waiting on the HTTP response — a slow plugin +/// shouldn't block the modal indefinitely. +const DRY_RUN_PER_PAIR_TIMEOUT: Duration = Duration::from_secs(20); + +/// Default sample size when the request omits `sample_size`. Mirrors the +/// plan's "show me 5 series" UX. +const DEFAULT_SAMPLE_SIZE: u32 = 5; + +/// Hard cap on `sample_size`. Prevents the dry-run from doing the full task +/// over HTTP. +const MAX_SAMPLE_SIZE: u32 = 20; + +/// Friendly label for a [`FieldGroup`] used in the public field-group catalog. +fn field_group_label(group: FieldGroup) -> &'static str { + match group { + FieldGroup::Identifiers => "Identifiers", + FieldGroup::Descriptive => "Descriptive", + FieldGroup::Status => "Status", + FieldGroup::Counts => "Counts", + FieldGroup::Ratings => "Ratings", + FieldGroup::Cover => "Cover", + FieldGroup::Tags => "Tags", + FieldGroup::Genres => "Genres", + FieldGroup::AgeRating => "Age Rating", + FieldGroup::Classification => "Classification", + FieldGroup::Publisher => "Publisher", + FieldGroup::ExternalRefs => "External References", + } +} + +/// `GET /api/v1/libraries/{library_id}/metadata-refresh` +#[utoipa::path( + get, + path = "/api/v1/libraries/{library_id}/metadata-refresh", + params( + ("library_id" = Uuid, Path, description = "Library ID"), + ), + responses( + (status = 200, description = "Current scheduled refresh config (defaults if none persisted)", body = MetadataRefreshConfigDto), + (status = 403, description = "Forbidden"), + (status = 404, description = "Library not found"), + ), + security( + ("jwt_bearer" = []), + ("api_key" = []), + ), + tag = "Metadata Refresh", +)] +pub async fn get_refresh_config( + State(state): State>, + auth: AuthContext, + Path(library_id): Path, +) -> Result, ApiError> { + auth.require_permission(&Permission::LibrariesRead)?; + ensure_library_exists(&state.db, library_id).await?; + + let cfg = LibraryRepository::get_metadata_refresh_config(&state.db, library_id) + .await + .map_err(|e| ApiError::Internal(format!("Failed to load refresh config: {e}")))?; + + Ok(Json(cfg.into())) +} + +/// `PATCH /api/v1/libraries/{library_id}/metadata-refresh` +#[utoipa::path( + patch, + path = "/api/v1/libraries/{library_id}/metadata-refresh", + params( + ("library_id" = Uuid, Path, description = "Library ID"), + ), + request_body = MetadataRefreshConfigPatchDto, + responses( + (status = 200, description = "Updated config", body = MetadataRefreshConfigDto), + (status = 400, description = "Invalid cron, timezone, field group, or provider"), + (status = 403, description = "Forbidden"), + (status = 404, description = "Library not found"), + ), + security( + ("jwt_bearer" = []), + ("api_key" = []), + ), + tag = "Metadata Refresh", +)] +pub async fn patch_refresh_config( + State(state): State>, + auth: AuthContext, + Path(library_id): Path, + Json(patch): Json, +) -> Result, ApiError> { + auth.require_permission(&Permission::LibrariesWrite)?; + ensure_library_exists(&state.db, library_id).await?; + + let mut cfg = LibraryRepository::get_metadata_refresh_config(&state.db, library_id) + .await + .map_err(|e| ApiError::Internal(format!("Failed to load refresh config: {e}")))?; + + apply_patch_with_validation(&mut cfg, patch, &state.db).await?; + + LibraryRepository::set_metadata_refresh_config(&state.db, library_id, &cfg) + .await + .map_err(|e| ApiError::Internal(format!("Failed to persist refresh config: {e}")))?; + + // Reload the scheduler so the change picks up immediately. Best-effort — + // a failure here doesn't undo the config write because the next reload + // (or restart) will pick it up. + if let Some(scheduler) = &state.scheduler + && let Err(e) = scheduler.lock().await.reload_schedules().await + { + tracing::warn!( + "Failed to reload scheduler after metadata-refresh config update: {}", + e + ); + } + + Ok(Json(cfg.into())) +} + +/// `POST /api/v1/libraries/{library_id}/metadata-refresh/run-now` +#[utoipa::path( + post, + path = "/api/v1/libraries/{library_id}/metadata-refresh/run-now", + params( + ("library_id" = Uuid, Path, description = "Library ID"), + ), + responses( + (status = 200, description = "Task enqueued", body = RunNowResponse), + (status = 403, description = "Forbidden"), + (status = 404, description = "Library not found"), + (status = 409, description = "A refresh task is already running"), + ), + security( + ("jwt_bearer" = []), + ("api_key" = []), + ), + tag = "Metadata Refresh", +)] +pub async fn run_refresh_now( + State(state): State>, + auth: AuthContext, + Path(library_id): Path, +) -> Result, ApiError> { + auth.require_permission(&Permission::LibrariesWrite)?; + ensure_library_exists(&state.db, library_id).await?; + + if has_active_refresh_task(&state.db, library_id).await? { + return Err(ApiError::Conflict(format!( + "A metadata refresh is already in progress for library {library_id}" + ))); + } + + let task_id = TaskRepository::enqueue( + &state.db, + TaskType::RefreshLibraryMetadata { library_id }, + None, + ) + .await + .map_err(|e| ApiError::Internal(format!("Failed to enqueue refresh task: {e}")))?; + + Ok(Json(RunNowResponse { task_id })) +} + +/// `POST /api/v1/libraries/{library_id}/metadata-refresh/dry-run` +/// +/// Synchronous over HTTP. Bounded by `sampleSize` (default 5, capped at 20). +#[utoipa::path( + post, + path = "/api/v1/libraries/{library_id}/metadata-refresh/dry-run", + params( + ("library_id" = Uuid, Path, description = "Library ID"), + ), + request_body = DryRunRequest, + responses( + (status = 200, description = "Sample of would-be changes", body = DryRunResponse), + (status = 400, description = "Invalid override config"), + (status = 403, description = "Forbidden"), + (status = 404, description = "Library not found"), + ), + security( + ("jwt_bearer" = []), + ("api_key" = []), + ), + tag = "Metadata Refresh", +)] +pub async fn dry_run_refresh( + State(state): State>, + auth: AuthContext, + Path(library_id): Path, + Json(req): Json, +) -> Result, ApiError> { + auth.require_permission(&Permission::LibrariesWrite)?; + ensure_library_exists(&state.db, library_id).await?; + + let config = if let Some(override_dto) = req.config_override { + let mut cfg = MetadataRefreshConfig::default(); + apply_override_dto(&mut cfg, override_dto, &state.db).await?; + cfg + } else { + LibraryRepository::get_metadata_refresh_config(&state.db, library_id) + .await + .map_err(|e| ApiError::Internal(format!("Failed to load refresh config: {e}")))? + }; + + let sample_size = req + .sample_size + .unwrap_or(DEFAULT_SAMPLE_SIZE) + .clamp(1, MAX_SAMPLE_SIZE); + + let plan = RefreshPlanner::plan(&state.db, library_id, &config) + .await + .map_err(|e| ApiError::Internal(format!("Failed to plan refresh: {e}")))?; + + let totals = plan_totals(&plan); + let fields_filter = fields_filter_from_config(&config); + + let matching_strategy = if config.existing_source_ids_only { + MatchingStrategy::ExistingExternalIdOnly + } else { + MatchingStrategy::AllowReMatch + }; + + let sample_pairs: Vec<&PlannedRefresh> = + plan.planned.iter().take(sample_size as usize).collect(); + + let mut sample = Vec::with_capacity(sample_pairs.len()); + for planned in sample_pairs { + match dry_run_one_pair( + &state.db, + library_id, + planned, + fields_filter.as_ref(), + matching_strategy, + &state, + ) + .await + { + Ok(delta) => sample.push(delta), + Err(e) => { + tracing::warn!( + "Dry-run preview failed for series {} / plugin {}: {:#}", + planned.series_id, + planned.plugin.name, + e + ); + // Surface the failure as a single-skip row so the UI can + // show "this series couldn't be previewed" instead of + // silently dropping it. + sample.push(DryRunSeriesDelta { + series_id: planned.series_id, + series_title: lookup_series_title(&state.db, planned.series_id).await, + provider: format!("plugin:{}", planned.plugin.name), + changes: Vec::new(), + skipped: vec![DryRunSkippedFieldDto { + field: "_preview".to_string(), + reason: format!("Preview failed: {e}"), + }], + }); + } + } + } + + Ok(Json(DryRunResponse { + sample, + total_eligible: totals.total_eligible, + est_skipped_no_id: totals.skipped_no_id, + est_skipped_recently_synced: totals.skipped_recently_synced, + unresolved_providers: plan.unresolved_providers, + })) +} + +/// `GET /api/v1/metadata-refresh/field-groups` +#[utoipa::path( + get, + path = "/api/v1/metadata-refresh/field-groups", + responses( + (status = 200, description = "All field groups in display order", body = [FieldGroupDto]), + (status = 403, description = "Forbidden"), + ), + security( + ("jwt_bearer" = []), + ("api_key" = []), + ), + tag = "Metadata Refresh", +)] +pub async fn list_field_groups(auth: AuthContext) -> Result>, ApiError> { + auth.require_permission(&Permission::LibrariesRead)?; + + let out: Vec = FieldGroup::all() + .iter() + .map(|g| FieldGroupDto { + id: g.as_str().to_string(), + label: field_group_label(*g).to_string(), + fields: fields_for_group(*g) + .iter() + .map(|s| (*s).to_string()) + .collect(), + }) + .collect(); + + Ok(Json(out)) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async fn ensure_library_exists(db: &DatabaseConnection, library_id: Uuid) -> Result<(), ApiError> { + LibraryRepository::get_by_id(db, library_id) + .await + .map_err(|e| ApiError::Internal(format!("Failed to load library: {e}")))? + .ok_or_else(|| ApiError::NotFound(format!("Library {library_id} not found"))) + .map(|_| ()) +} + +/// Detect whether a `RefreshLibraryMetadata` task for this library is still +/// in flight. Mirrors the scheduler's skip-if-already-running guard so the +/// HTTP "run now" path doesn't pile work onto an existing run. +async fn has_active_refresh_task( + db: &DatabaseConnection, + library_id: Uuid, +) -> Result { + use crate::db::entities::{prelude::Tasks, tasks}; + use sea_orm::{ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter}; + + let count = Tasks::find() + .filter(tasks::Column::TaskType.eq("refresh_library_metadata")) + .filter(tasks::Column::LibraryId.eq(library_id)) + .filter(tasks::Column::Status.is_in(["pending", "processing"])) + .count(db) + .await + .map_err(|e| ApiError::Internal(format!("Failed to check active refresh tasks: {e}")))?; + + Ok(count > 0) +} + +/// Apply a PATCH to the in-memory config, validating each field as we go. +/// +/// Validation strategy: +/// - cron: parse via `validate_cron_expression` (rejects nonsense up front). +/// Stores the *original* (pre-normalization) string so the user sees what +/// they typed; the scheduler normalizes again at register time. +/// - timezone: IANA name check via `validate_timezone`. +/// - field_groups / extra_fields: every group string must parse to a known +/// `FieldGroup`. Extras pass through (power-user hatch). +/// - providers: each `"plugin:"` must resolve to an existing plugin. +/// Disabled plugins are accepted (so the user can persist a "wait for +/// admin to enable this" state) but missing plugins are rejected. +/// - per_provider_overrides: same group validation; unknown groups rejected. +async fn apply_patch_with_validation( + cfg: &mut MetadataRefreshConfig, + patch: MetadataRefreshConfigPatchDto, + db: &DatabaseConnection, +) -> Result<(), ApiError> { + if let Some(v) = patch.enabled { + cfg.enabled = v; + } + if let Some(v) = patch.cron_schedule { + validate_cron_expression(&v) + .map_err(|e| ApiError::BadRequest(format!("Invalid cron schedule: {e}")))?; + cfg.cron_schedule = v; + } + if let Some(maybe_tz) = patch.timezone.into_nested_option() { + match maybe_tz { + None => cfg.timezone = None, + Some(tz) => { + validate_timezone(&tz) + .map_err(|e| ApiError::BadRequest(format!("Invalid timezone: {e}")))?; + cfg.timezone = Some(tz); + } + } + } + if let Some(groups) = patch.field_groups { + validate_field_groups(&groups)?; + cfg.field_groups = groups; + } + if let Some(extras) = patch.extra_fields { + cfg.extra_fields = extras; + } + if let Some(providers) = patch.providers { + validate_providers(db, &providers).await?; + cfg.providers = providers; + } + if let Some(v) = patch.existing_source_ids_only { + cfg.existing_source_ids_only = v; + } + if let Some(v) = patch.skip_recently_synced_within_s { + cfg.skip_recently_synced_within_s = v; + } + if let Some(v) = patch.max_concurrency { + if v == 0 { + return Err(ApiError::BadRequest( + "max_concurrency must be at least 1".to_string(), + )); + } + cfg.max_concurrency = v; + } + if let Some(maybe_overrides) = patch.per_provider_overrides.into_nested_option() { + match maybe_overrides { + None => cfg.per_provider_overrides = None, + Some(map) => { + for (provider, ovr) in &map { + validate_field_groups(&ovr.field_groups).map_err(|e| match e { + ApiError::BadRequest(msg) => ApiError::BadRequest(format!( + "Invalid override for provider '{provider}': {msg}" + )), + other => other, + })?; + } + let mut converted: BTreeMap = BTreeMap::new(); + for (k, v) in map { + converted.insert(k, v.into()); + } + cfg.per_provider_overrides = Some(converted); + } + } + } + Ok(()) +} + +/// Apply a complete config DTO (used by dry-run's `configOverride`) onto a +/// fresh default config, with the same validation as PATCH. +async fn apply_override_dto( + cfg: &mut MetadataRefreshConfig, + dto: MetadataRefreshConfigDto, + db: &DatabaseConnection, +) -> Result<(), ApiError> { + cfg.enabled = dto.enabled; + validate_cron_expression(&dto.cron_schedule) + .map_err(|e| ApiError::BadRequest(format!("Invalid cron schedule: {e}")))?; + cfg.cron_schedule = dto.cron_schedule; + + if let Some(tz) = dto.timezone { + validate_timezone(&tz) + .map_err(|e| ApiError::BadRequest(format!("Invalid timezone: {e}")))?; + cfg.timezone = Some(tz); + } + validate_field_groups(&dto.field_groups)?; + cfg.field_groups = dto.field_groups; + cfg.extra_fields = dto.extra_fields; + validate_providers(db, &dto.providers).await?; + cfg.providers = dto.providers; + cfg.existing_source_ids_only = dto.existing_source_ids_only; + cfg.skip_recently_synced_within_s = dto.skip_recently_synced_within_s; + if dto.max_concurrency == 0 { + return Err(ApiError::BadRequest( + "max_concurrency must be at least 1".to_string(), + )); + } + cfg.max_concurrency = dto.max_concurrency; + + if let Some(map) = dto.per_provider_overrides { + for (provider, ovr) in &map { + validate_field_groups(&ovr.field_groups).map_err(|e| match e { + ApiError::BadRequest(msg) => ApiError::BadRequest(format!( + "Invalid override for provider '{provider}': {msg}" + )), + other => other, + })?; + } + let mut converted: BTreeMap = BTreeMap::new(); + for (k, v) in map { + converted.insert(k, v.into()); + } + cfg.per_provider_overrides = Some(converted); + } + Ok(()) +} + +fn validate_field_groups(groups: &[String]) -> Result<(), ApiError> { + for g in groups { + FieldGroup::from_str(g) + .map_err(|_| ApiError::BadRequest(format!("Unknown field group '{g}'")))?; + } + Ok(()) +} + +/// Each provider must be `"plugin:"` and point to an installed plugin. +/// Disabled plugins are allowed: the user may persist them in anticipation +/// of enabling later, and the planner already records them as +/// `unresolved_providers` at run time. +async fn validate_providers(db: &DatabaseConnection, providers: &[String]) -> Result<(), ApiError> { + for p in providers { + let Some(name) = p.strip_prefix("plugin:") else { + return Err(ApiError::BadRequest(format!( + "Invalid provider '{p}': expected 'plugin:'" + ))); + }; + let plugin = PluginsRepository::get_by_name(db, name) + .await + .map_err(|e| ApiError::Internal(format!("Failed to look up plugin '{name}': {e}")))?; + if plugin.is_none() { + return Err(ApiError::BadRequest(format!( + "Unknown plugin '{name}' in provider '{p}'" + ))); + } + } + Ok(()) +} + +struct PlanTotals { + total_eligible: u32, + skipped_no_id: u32, + skipped_recently_synced: u32, +} + +fn plan_totals(plan: &RefreshPlan) -> PlanTotals { + let counts = plan.skipped_by_reason(); + PlanTotals { + total_eligible: plan.planned.len() as u32, + skipped_no_id: counts.get("no_external_id").copied().unwrap_or(0) as u32, + skipped_recently_synced: counts.get("recently_synced").copied().unwrap_or(0) as u32, + } +} + +/// Lookup a series' display name. Best-effort; falls back to the UUID string +/// so the dry-run response stays well-formed even if the row was deleted +/// between planning and rendering. +async fn lookup_series_title(db: &DatabaseConnection, series_id: Uuid) -> String { + match SeriesRepository::get_by_id(db, series_id).await { + Ok(Some(s)) => { + // Prefer the metadata title (canonical display name) when present. + match SeriesMetadataRepository::get_by_series_id(db, series_id).await { + Ok(Some(meta)) if !meta.title.is_empty() => meta.title, + _ => s.name, + } + } + _ => series_id.to_string(), + } +} + +/// Fetch metadata for one planned pair, run a dry-run apply, and convert the +/// result to a [`DryRunSeriesDelta`]. +/// +/// Re-uses the task handler's matching semantics (strict vs. loose) so the +/// preview reflects what a real run would do. +async fn dry_run_one_pair( + db: &DatabaseConnection, + library_id: Uuid, + planned: &PlannedRefresh, + fields_filter: Option<&std::collections::HashSet>, + matching_strategy: MatchingStrategy, + state: &AppState, +) -> Result { + let plugin = &planned.plugin; + + // 1. External ID resolution mirrors the task handler's process_pair. + let external_id = if let Some(record) = planned.existing_external_id.as_ref() { + record.external_id.clone() + } else { + match matching_strategy { + MatchingStrategy::ExistingExternalIdOnly => { + anyhow::bail!("No stored external ID for series; strict mode") + } + MatchingStrategy::AllowReMatch => rematch_for_dry_run(db, planned, state).await?, + } + }; + + let get_params = MetadataGetParams { + external_id: external_id.clone(), + }; + let plugin_metadata = tokio::time::timeout( + DRY_RUN_PER_PAIR_TIMEOUT, + state + .plugin_manager + .get_series_metadata(plugin.id, get_params), + ) + .await + .map_err(|_| anyhow::anyhow!("Plugin call timed out"))? + .map_err(|e| anyhow::anyhow!("Plugin call failed: {e}"))?; + + let current_metadata = SeriesMetadataRepository::get_by_series_id(db, planned.series_id) + .await + .map_err(|e| anyhow::anyhow!("Failed to load current metadata: {e:#}"))?; + + let options = ApplyOptions { + fields_filter: fields_filter.cloned(), + thumbnail_service: Some(state.thumbnail_service.clone()), + event_broadcaster: None, // Don't emit events for previews. + dry_run: true, + matching_strategy, + }; + + let result = MetadataApplier::apply( + db, + planned.series_id, + library_id, + plugin, + &plugin_metadata, + current_metadata.as_ref(), + &options, + ) + .await + .map_err(|e| anyhow::anyhow!("Dry-run apply failed: {e:#}"))?; + + let changes: Vec = result + .dry_run_report + .map(|r| { + r.changes + .into_iter() + .map(|c| FieldChangeDto { + field: c.field, + before: c.before, + after: c.after, + }) + .collect() + }) + .unwrap_or_default(); + + let skipped: Vec = result + .skipped_fields + .into_iter() + .map(|sf| DryRunSkippedFieldDto { + field: sf.field, + reason: sf.reason, + }) + .collect(); + + Ok(DryRunSeriesDelta { + series_id: planned.series_id, + series_title: lookup_series_title(db, planned.series_id).await, + provider: format!("plugin:{}", plugin.name), + changes, + skipped, + }) +} + +/// Loose-mode rematch helper for dry-run. Mirrors `process_pair`'s rematch +/// path but stops at returning the candidate external ID — the dry-run +/// caller does the rest. +async fn rematch_for_dry_run( + db: &DatabaseConnection, + planned: &PlannedRefresh, + state: &AppState, +) -> Result { + let series = SeriesRepository::get_by_id(db, planned.series_id) + .await + .map_err(|e| anyhow::anyhow!("Failed to load series for re-match: {e:#}"))? + .ok_or_else(|| anyhow::anyhow!("Series {} not found", planned.series_id))?; + let metadata = SeriesMetadataRepository::get_by_series_id(db, planned.series_id) + .await + .map_err(|e| anyhow::anyhow!("Failed to load metadata for re-match: {e:#}"))?; + let title = metadata + .as_ref() + .map(|m| m.title.clone()) + .filter(|t| !t.is_empty()) + .unwrap_or_else(|| series.name.clone()); + let year = metadata.as_ref().and_then(|m| m.year); + + let match_params = MetadataMatchParams { + title, + year, + author: None, + }; + + let candidate = tokio::time::timeout( + DRY_RUN_PER_PAIR_TIMEOUT, + state + .plugin_manager + .match_series(planned.plugin.id, match_params), + ) + .await + .map_err(|_| anyhow::anyhow!("Re-match timed out"))? + .map_err(|e| anyhow::anyhow!("Re-match failed: {e}"))? + .ok_or_else(|| anyhow::anyhow!("No match candidate"))?; + + Ok(candidate.external_id) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn field_group_label_covers_every_variant() { + // Smoke-test that all enum variants are mapped — adding a variant + // without a label would otherwise pass clippy but produce a blank + // label in the API response. + for g in FieldGroup::all() { + let label = field_group_label(*g); + assert!(!label.is_empty(), "field group {:?} has empty label", g); + } + } + + #[test] + fn validate_field_groups_accepts_known() { + let groups = vec![ + "ratings".to_string(), + "status".to_string(), + "counts".to_string(), + ]; + assert!(validate_field_groups(&groups).is_ok()); + } + + #[test] + fn validate_field_groups_rejects_unknown() { + let groups = vec!["ratings".to_string(), "made_up".to_string()]; + let err = validate_field_groups(&groups).unwrap_err(); + match err { + ApiError::BadRequest(msg) => assert!(msg.contains("made_up")), + _ => panic!("expected BadRequest"), + } + } +} diff --git a/src/api/routes/v1/handlers/mod.rs b/src/api/routes/v1/handlers/mod.rs index ff556d70..1c382807 100644 --- a/src/api/routes/v1/handlers/mod.rs +++ b/src/api/routes/v1/handlers/mod.rs @@ -51,6 +51,7 @@ pub mod filesystem; pub mod health; pub mod info; pub mod libraries; +pub mod metadata_refresh; pub mod metrics; pub mod oidc; pub mod pages; diff --git a/src/api/routes/v1/routes/libraries.rs b/src/api/routes/v1/routes/libraries.rs index 7e8157fa..2ec1a07d 100644 --- a/src/api/routes/v1/routes/libraries.rs +++ b/src/api/routes/v1/routes/libraries.rs @@ -101,4 +101,21 @@ pub fn routes(_state: Arc) -> Router> { "/libraries/{library_id}/series/titles/reprocess", post(handlers::task_queue::reprocess_library_series_titles), ) + // Scheduled metadata refresh (Phase 6) + .route( + "/libraries/{library_id}/metadata-refresh", + get(handlers::metadata_refresh::get_refresh_config), + ) + .route( + "/libraries/{library_id}/metadata-refresh", + patch(handlers::metadata_refresh::patch_refresh_config), + ) + .route( + "/libraries/{library_id}/metadata-refresh/run-now", + post(handlers::metadata_refresh::run_refresh_now), + ) + .route( + "/libraries/{library_id}/metadata-refresh/dry-run", + post(handlers::metadata_refresh::dry_run_refresh), + ) } diff --git a/src/api/routes/v1/routes/misc.rs b/src/api/routes/v1/routes/misc.rs index db6ce6b6..f992ed04 100644 --- a/src/api/routes/v1/routes/misc.rs +++ b/src/api/routes/v1/routes/misc.rs @@ -80,4 +80,9 @@ pub fn routes(_state: Arc) -> Router> { "/settings/public", get(handlers::settings::get_public_settings), ) + // Static field-group catalog for the scheduled metadata refresh UI + .route( + "/metadata-refresh/field-groups", + get(handlers::metadata_refresh::list_field_groups), + ) } diff --git a/tests/api.rs b/tests/api.rs index a9238fad..1b5642f3 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -23,6 +23,7 @@ mod api { mod koreader; mod libraries; mod metadata_locks; + mod metadata_refresh; mod metadata_reset; mod metrics; mod oidc; diff --git a/tests/api/metadata_refresh.rs b/tests/api/metadata_refresh.rs new file mode 100644 index 00000000..2eb3e004 --- /dev/null +++ b/tests/api/metadata_refresh.rs @@ -0,0 +1,565 @@ +// Allow unused temp_dir - needed to keep TempDir alive but not always referenced +#![allow(unused_variables)] + +#[path = "../common/mod.rs"] +mod common; + +use codex::api::error::ErrorResponse; +use codex::api::routes::v1::dto::{ + DryRunResponse, FieldGroupDto, MetadataRefreshConfigDto, RunNowResponse, +}; +use codex::db::ScanningStrategy; +use codex::db::entities::plugins::PluginPermission; +use codex::db::repositories::{LibraryRepository, PluginsRepository, UserRepository}; +use codex::services::plugin::protocol::PluginScope; +use codex::utils::password; +use common::*; +use hyper::StatusCode; +use std::env; +use std::sync::Once; +use uuid::Uuid; + +static INIT_ENCRYPTION: Once = Once::new(); + +fn setup_test_encryption_key() { + INIT_ENCRYPTION.call_once(|| { + if env::var("CODEX_ENCRYPTION_KEY").is_err() { + // SAFETY: tests run with shared env access; first writer wins. + unsafe { + env::set_var( + "CODEX_ENCRYPTION_KEY", + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8=", + ); + } + } + }); +} + +async fn create_admin_and_token( + db: &sea_orm::DatabaseConnection, + state: &codex::api::extractors::AppState, +) -> String { + let password_hash = password::hash_password("admin123").unwrap(); + let user = create_test_user("admin", "admin@example.com", &password_hash, true); + let created = UserRepository::create(db, &user).await.unwrap(); + state + .jwt_service + .generate_token(created.id, created.username.clone(), created.get_role()) + .unwrap() +} + +async fn create_readonly_and_token( + db: &sea_orm::DatabaseConnection, + state: &codex::api::extractors::AppState, +) -> String { + let password_hash = password::hash_password("user123").unwrap(); + let user = create_test_user("readonly", "readonly@example.com", &password_hash, false); + let created = UserRepository::create(db, &user).await.unwrap(); + state + .jwt_service + .generate_token(created.id, created.username.clone(), created.get_role()) + .unwrap() +} + +async fn create_test_plugin(db: &sea_orm::DatabaseConnection, name: &str, enabled: bool) -> Uuid { + setup_test_encryption_key(); + PluginsRepository::create( + db, + name, + name, + None, + "system", + "node", + vec!["dist/index.js".to_string()], + vec![], + None, + vec![PluginPermission::MetadataWriteSummary], + vec![PluginScope::SeriesDetail], + vec![], + None, + "env", + None, + enabled, + None, + None, + ) + .await + .unwrap() + .id +} + +async fn create_library(db: &sea_orm::DatabaseConnection) -> Uuid { + LibraryRepository::create(db, "Test Library", "/tmp/test", ScanningStrategy::Default) + .await + .unwrap() + .id +} + +// ============================================================================ +// GET refresh config +// ============================================================================ + +#[tokio::test] +async fn test_get_refresh_config_returns_defaults_when_unset() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let req = get_request_with_auth(&uri, &token); + let (status, body): (StatusCode, Option) = + make_json_request(app, req).await; + + assert_eq!(status, StatusCode::OK); + let cfg = body.unwrap(); + assert!(!cfg.enabled); + assert!(cfg.existing_source_ids_only); + assert_eq!(cfg.field_groups, vec!["ratings", "status", "counts"]); + assert!(cfg.providers.is_empty()); + assert_eq!(cfg.cron_schedule, "0 0 4 * * *"); +} + +#[tokio::test] +async fn test_get_refresh_config_returns_persisted_values() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + // Persist a non-default config directly through the repository. + let cfg = codex::services::metadata::MetadataRefreshConfig { + enabled: true, + cron_schedule: "0 30 2 * * *".to_string(), + providers: vec!["plugin:none".to_string()], + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library_id, &cfg) + .await + .unwrap(); + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let req = get_request_with_auth(&uri, &token); + let (status, body): (StatusCode, Option) = + make_json_request(app, req).await; + + assert_eq!(status, StatusCode::OK); + let dto = body.unwrap(); + assert!(dto.enabled); + assert_eq!(dto.cron_schedule, "0 30 2 * * *"); + assert_eq!(dto.providers, vec!["plugin:none"]); +} + +#[tokio::test] +async fn test_get_refresh_config_unknown_library_404() { + let (db, _tmp) = setup_test_db().await; + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let bogus = Uuid::new_v4(); + let uri = format!("/api/v1/libraries/{bogus}/metadata-refresh"); + let req = get_request_with_auth(&uri, &token); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_get_refresh_config_requires_auth() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + let state = create_test_app_state(db.clone()).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let req = get_request(&uri); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::UNAUTHORIZED); +} + +// ============================================================================ +// PATCH refresh config +// ============================================================================ + +#[tokio::test] +async fn test_patch_refresh_config_round_trips() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + let plugin_id = create_test_plugin(&db, "mangabaka", true).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{ + "enabled": true, + "cronSchedule": "0 0 5 * * *", + "fieldGroups": ["ratings", "counts"], + "providers": ["plugin:mangabaka"] + }"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + + assert_eq!(status, StatusCode::OK); + let cfg = response.unwrap(); + assert!(cfg.enabled); + assert_eq!(cfg.cron_schedule, "0 0 5 * * *"); + assert_eq!(cfg.field_groups, vec!["ratings", "counts"]); + assert_eq!(cfg.providers, vec!["plugin:mangabaka"]); + + // Verify persistence: read back via repo. + let stored = LibraryRepository::get_metadata_refresh_config(&db, library_id) + .await + .unwrap(); + assert!(stored.enabled); + assert_eq!(stored.cron_schedule, "0 0 5 * * *"); +} + +#[tokio::test] +async fn test_patch_refresh_config_partial_preserves_other_fields() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + // Seed an existing config with field_groups set. + let cfg = codex::services::metadata::MetadataRefreshConfig { + field_groups: vec!["status".to_string(), "counts".to_string()], + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library_id, &cfg) + .await + .unwrap(); + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"enabled": true}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + + assert_eq!(status, StatusCode::OK); + let dto = response.unwrap(); + assert!(dto.enabled); + // field_groups untouched. + assert_eq!(dto.field_groups, vec!["status", "counts"]); +} + +#[tokio::test] +async fn test_patch_refresh_config_rejects_invalid_cron() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"cronSchedule": "not a cron"}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, err): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); + let err = err.unwrap(); + assert!(err.message.to_lowercase().contains("cron")); +} + +#[tokio::test] +async fn test_patch_refresh_config_rejects_unknown_field_group() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"fieldGroups": ["ratings", "made_up_group"]}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, err): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); + assert!(err.unwrap().message.contains("made_up_group")); +} + +#[tokio::test] +async fn test_patch_refresh_config_rejects_unknown_provider() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"providers": ["plugin:nonexistent"]}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, err): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); + assert!(err.unwrap().message.contains("nonexistent")); +} + +#[tokio::test] +async fn test_patch_refresh_config_rejects_invalid_timezone() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"timezone": "Not/Valid"}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_patch_refresh_config_clears_timezone_on_null() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + let cfg = codex::services::metadata::MetadataRefreshConfig { + timezone: Some("Europe/Paris".to_string()), + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library_id, &cfg) + .await + .unwrap(); + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"timezone": null}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + + assert_eq!(status, StatusCode::OK); + assert!(response.unwrap().timezone.is_none()); +} + +#[tokio::test] +async fn test_patch_refresh_config_rejects_zero_max_concurrency() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"maxConcurrency": 0}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_patch_refresh_config_requires_write_permission() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_readonly_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh"); + let body = r#"{"enabled": true}"#; + let req = patch_request_with_auth_json(&uri, &token, body); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::FORBIDDEN); +} + +// ============================================================================ +// run-now +// ============================================================================ + +#[tokio::test] +async fn test_run_now_enqueues_task() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh/run-now"); + let req = post_request_with_auth(&uri, &token); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + assert_eq!(status, StatusCode::OK); + let task_id = response.unwrap().task_id; + assert!(!task_id.is_nil()); + + // Verify a task was created in the DB. + use codex::db::entities::{prelude::Tasks, tasks}; + use sea_orm::{ColumnTrait, EntityTrait, QueryFilter}; + let stored = Tasks::find() + .filter(tasks::Column::TaskType.eq("refresh_library_metadata")) + .filter(tasks::Column::LibraryId.eq(library_id)) + .one(&db) + .await + .unwrap(); + assert!(stored.is_some(), "task should have been enqueued"); +} + +#[tokio::test] +async fn test_run_now_conflicts_when_active_task_exists() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + // Pre-enqueue a task to simulate an in-flight refresh. + use codex::db::repositories::TaskRepository; + use codex::tasks::types::TaskType; + TaskRepository::enqueue(&db, TaskType::RefreshLibraryMetadata { library_id }, None) + .await + .unwrap(); + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh/run-now"); + let req = post_request_with_auth(&uri, &token); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::CONFLICT); +} + +#[tokio::test] +async fn test_run_now_unknown_library_404() { + let (db, _tmp) = setup_test_db().await; + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let bogus = Uuid::new_v4(); + let uri = format!("/api/v1/libraries/{bogus}/metadata-refresh/run-now"); + let req = post_request_with_auth(&uri, &token); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::NOT_FOUND); +} + +// ============================================================================ +// dry-run +// ============================================================================ + +#[tokio::test] +async fn test_dry_run_with_no_providers_returns_empty_sample() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh/dry-run"); + let req = post_request_with_auth_json(&uri, &token, "{}"); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + assert_eq!(status, StatusCode::OK); + let dto = response.unwrap(); + assert!(dto.sample.is_empty()); + assert_eq!(dto.total_eligible, 0); + assert!(dto.unresolved_providers.is_empty()); +} + +#[tokio::test] +async fn test_dry_run_reports_unresolved_providers() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let cfg = codex::services::metadata::MetadataRefreshConfig { + providers: vec!["plugin:not_installed".to_string()], + ..Default::default() + }; + LibraryRepository::set_metadata_refresh_config(&db, library_id, &cfg) + .await + .unwrap(); + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh/dry-run"); + let req = post_request_with_auth_json(&uri, &token, "{}"); + let (status, response): (StatusCode, Option) = + make_json_request(app, req).await; + assert_eq!(status, StatusCode::OK); + let dto = response.unwrap(); + assert_eq!(dto.unresolved_providers, vec!["plugin:not_installed"]); + assert!(dto.sample.is_empty()); +} + +#[tokio::test] +async fn test_dry_run_validates_config_override() { + let (db, _tmp) = setup_test_db().await; + let library_id = create_library(&db).await; + + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let uri = format!("/api/v1/libraries/{library_id}/metadata-refresh/dry-run"); + // Bad cron in the override. + let body = r#"{"configOverride": { + "enabled": false, + "cronSchedule": "garbage", + "fieldGroups": ["ratings"], + "extraFields": [], + "providers": [], + "existingSourceIdsOnly": true, + "skipRecentlySyncedWithinS": 0, + "maxConcurrency": 4 + }}"#; + let req = post_request_with_auth_json(&uri, &token, body); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::BAD_REQUEST); +} + +// ============================================================================ +// field-groups catalog +// ============================================================================ + +#[tokio::test] +async fn test_list_field_groups_returns_full_catalog() { + let (db, _tmp) = setup_test_db().await; + let state = create_test_app_state(db.clone()).await; + let token = create_admin_and_token(&db, &state).await; + let app = create_test_router_with_app_state(state); + + let req = get_request_with_auth("/api/v1/metadata-refresh/field-groups", &token); + let (status, response): (StatusCode, Option>) = + make_json_request(app, req).await; + assert_eq!(status, StatusCode::OK); + let groups = response.unwrap(); + // 12 groups in FieldGroup::all(). + assert_eq!(groups.len(), 12); + + let ratings = groups.iter().find(|g| g.id == "ratings").unwrap(); + assert!(ratings.fields.contains(&"rating".to_string())); + assert!(ratings.fields.contains(&"externalRatings".to_string())); + assert_eq!(ratings.label, "Ratings"); + + let counts = groups.iter().find(|g| g.id == "counts").unwrap(); + assert!(counts.fields.contains(&"totalVolumeCount".to_string())); + assert!(counts.fields.contains(&"totalChapterCount".to_string())); +} + +#[tokio::test] +async fn test_list_field_groups_requires_auth() { + let (db, _tmp) = setup_test_db().await; + let state = create_test_app_state(db.clone()).await; + let app = create_test_router_with_app_state(state); + + let req = get_request("/api/v1/metadata-refresh/field-groups"); + let (status, _): (StatusCode, Option) = make_json_request(app, req).await; + assert_eq!(status, StatusCode::UNAUTHORIZED); +} diff --git a/web/openapi.json b/web/openapi.json index 38c2959b..3ccfb9aa 100644 --- a/web/openapi.json +++ b/web/openapi.json @@ -21707,6 +21707,131 @@ } } }, + "DryRunRequest": { + "type": "object", + "description": "Body for `POST /libraries/{id}/metadata-refresh/dry-run`.\n\n`configOverride` lets the UI preview a config that hasn't been saved yet —\n\"what would happen if I clicked Save right now?\". When absent, the saved\nconfig is used.", + "properties": { + "configOverride": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/MetadataRefreshConfigDto", + "description": "Optional unsaved config to preview. When absent, the library's saved\nconfig is used." + } + ], + "default": null + }, + "sampleSize": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "description": "Number of series to preview. Defaults to 5, capped at 20.", + "default": null, + "minimum": 0 + } + } + }, + "DryRunResponse": { + "type": "object", + "description": "Full dry-run response.", + "required": [ + "sample", + "totalEligible", + "estSkippedNoId", + "estSkippedRecentlySynced" + ], + "properties": { + "estSkippedNoId": { + "type": "integer", + "format": "int32", + "description": "Estimated `(series, provider)` pairs the planner skipped because the\nseries has no stored external ID for the provider (strict mode only).", + "minimum": 0 + }, + "estSkippedRecentlySynced": { + "type": "integer", + "format": "int32", + "description": "Estimated pairs skipped because their `last_synced_at` is younger than\nthe recency cutoff.", + "minimum": 0 + }, + "sample": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DryRunSeriesDelta" + }, + "description": "Per-series deltas, capped at the requested sample size." + }, + "totalEligible": { + "type": "integer", + "format": "int32", + "description": "Total number of `(series, provider)` pairs the planner produced before\nthe sample cap.", + "minimum": 0 + }, + "unresolvedProviders": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Provider strings from the config that don't resolve to an enabled\nplugin. Surfaced verbatim so the UI can highlight typos or disabled\nplugins." + } + } + }, + "DryRunSeriesDelta": { + "type": "object", + "description": "One series' would-be deltas in a dry-run preview.", + "required": [ + "seriesId", + "seriesTitle", + "provider", + "changes" + ], + "properties": { + "changes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FieldChangeDto" + }, + "description": "Fields that would be written." + }, + "provider": { + "type": "string", + "description": "Plugin id (`\"plugin:\"`) that produced this delta." + }, + "seriesId": { + "type": "string", + "format": "uuid" + }, + "seriesTitle": { + "type": "string" + }, + "skipped": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DryRunSkippedFieldDto" + }, + "description": "Fields that would be skipped (locked, no permission, etc.)." + } + } + }, + "DryRunSkippedFieldDto": { + "type": "object", + "description": "A field skipped during a dry-run apply, with the reason.", + "required": [ + "field", + "reason" + ], + "properties": { + "field": { + "type": "string" + }, + "reason": { + "type": "string" + } + } + }, "DuplicateGroup": { "type": "object", "description": "A group of duplicate books", @@ -22866,6 +22991,32 @@ } } }, + "FieldGroupDto": { + "type": "object", + "description": "One entry from `GET /api/v1/metadata-refresh/field-groups`.", + "required": [ + "id", + "label", + "fields" + ], + "properties": { + "fields": { + "type": "array", + "items": { + "type": "string" + }, + "description": "camelCase field names this group expands into. Match the\n`should_apply_field` call sites in `MetadataApplier`." + }, + "id": { + "type": "string", + "description": "Snake_case identifier stored in [`MetadataRefreshConfig::field_groups`]." + }, + "label": { + "type": "string", + "description": "Human-readable label for UI display." + } + } + }, "FieldOperator": { "oneOf": [ { @@ -26710,6 +26861,165 @@ } } }, + "MetadataRefreshConfigDto": { + "type": "object", + "description": "Full read response for a library's scheduled metadata-refresh config.\n\nWhen the library has no stored config, the server returns\n[`MetadataRefreshConfig::default`] so clients always render something.", + "required": [ + "enabled", + "cronSchedule", + "fieldGroups", + "extraFields", + "providers", + "existingSourceIdsOnly", + "skipRecentlySyncedWithinS", + "maxConcurrency" + ], + "properties": { + "cronSchedule": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "existingSourceIdsOnly": { + "type": "boolean" + }, + "extraFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "fieldGroups": { + "type": "array", + "items": { + "type": "string" + } + }, + "maxConcurrency": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "perProviderOverrides": { + "type": [ + "object", + "null" + ], + "additionalProperties": { + "$ref": "#/components/schemas/ProviderOverrideDto" + }, + "propertyNames": { + "type": "string" + } + }, + "providers": { + "type": "array", + "items": { + "type": "string" + } + }, + "skipRecentlySyncedWithinS": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "timezone": { + "type": [ + "string", + "null" + ] + } + } + }, + "MetadataRefreshConfigPatchDto": { + "type": "object", + "description": "Partial PATCH body. Uses [`PatchValue`] for nullable fields so clients can\ndistinguish \"leave alone\" from \"explicit clear\".\n\nAll other fields use plain `Option` because clearing a non-nullable\nfield doesn't make sense (e.g. you can't \"unset\" `enabled` — you can only\nflip it).", + "properties": { + "cronSchedule": { + "type": [ + "string", + "null" + ], + "default": null + }, + "enabled": { + "type": [ + "boolean", + "null" + ], + "default": null + }, + "existingSourceIdsOnly": { + "type": [ + "boolean", + "null" + ], + "default": null + }, + "extraFields": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "fieldGroups": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "maxConcurrency": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "default": null, + "minimum": 0 + }, + "perProviderOverrides": { + "type": [ + "object", + "null" + ] + }, + "providers": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "default": null + }, + "skipRecentlySyncedWithinS": { + "type": [ + "integer", + "null" + ], + "format": "int32", + "default": null, + "minimum": 0 + }, + "timezone": { + "type": [ + "string", + "null" + ], + "default": null + } + } + }, "MetricsCleanupResponse": { "type": "object", "description": "Response for cleanup operation", @@ -30384,6 +30694,24 @@ } } }, + "ProviderOverrideDto": { + "type": "object", + "description": "Per-provider override placeholder (Phase 8). Mirrors\n[`crate::services::metadata::ProviderOverride`] for the wire format.", + "properties": { + "extraFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "fieldGroups": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, "PublicSettingDto": { "type": "object", "description": "Public setting DTO (for non-admin users)\n\nA simplified setting DTO that only includes the key and value,\nused for public display settings accessible to all authenticated users.", @@ -31764,6 +32092,20 @@ } } }, + "RunNowResponse": { + "type": "object", + "description": "Response for `POST /libraries/{id}/metadata-refresh/run-now`.", + "required": [ + "taskId" + ], + "properties": { + "taskId": { + "type": "string", + "format": "uuid", + "description": "Background task ID. Subscribe to events on `/api/v1/events/stream` to\nfollow progress." + } + } + }, "ScanStatusDto": { "type": "object", "description": "Scan status response", @@ -36967,6 +37309,10 @@ "name": "Plugin Actions", "description": "Plugin action discovery and execution for metadata fetching" }, + { + "name": "Metadata Refresh", + "description": "Per-library scheduled metadata refresh configuration, dry-run preview, and run-now" + }, { "name": "User Plugins", "description": "User-facing plugin management, OAuth, and configuration" diff --git a/web/src/types/api.generated.ts b/web/src/types/api.generated.ts index 5944940c..53e5c17a 100644 --- a/web/src/types/api.generated.ts +++ b/web/src/types/api.generated.ts @@ -8959,6 +8959,69 @@ export interface components { DryRunReportDto: { changes: components["schemas"]["FieldChangeDto"][]; }; + /** + * @description Body for `POST /libraries/{id}/metadata-refresh/dry-run`. + * + * `configOverride` lets the UI preview a config that hasn't been saved yet — + * "what would happen if I clicked Save right now?". When absent, the saved + * config is used. + */ + DryRunRequest: { + /** @default null */ + configOverride: null | components["schemas"]["MetadataRefreshConfigDto"]; + /** + * Format: int32 + * @description Number of series to preview. Defaults to 5, capped at 20. + * @default null + */ + sampleSize: number | null; + }; + /** @description Full dry-run response. */ + DryRunResponse: { + /** + * Format: int32 + * @description Estimated `(series, provider)` pairs the planner skipped because the + * series has no stored external ID for the provider (strict mode only). + */ + estSkippedNoId: number; + /** + * Format: int32 + * @description Estimated pairs skipped because their `last_synced_at` is younger than + * the recency cutoff. + */ + estSkippedRecentlySynced: number; + /** @description Per-series deltas, capped at the requested sample size. */ + sample: components["schemas"]["DryRunSeriesDelta"][]; + /** + * Format: int32 + * @description Total number of `(series, provider)` pairs the planner produced before + * the sample cap. + */ + totalEligible: number; + /** + * @description Provider strings from the config that don't resolve to an enabled + * plugin. Surfaced verbatim so the UI can highlight typos or disabled + * plugins. + */ + unresolvedProviders?: string[]; + }; + /** @description One series' would-be deltas in a dry-run preview. */ + DryRunSeriesDelta: { + /** @description Fields that would be written. */ + changes: components["schemas"]["FieldChangeDto"][]; + /** @description Plugin id (`"plugin:"`) that produced this delta. */ + provider: string; + /** Format: uuid */ + seriesId: string; + seriesTitle: string; + /** @description Fields that would be skipped (locked, no permission, etc.). */ + skipped?: components["schemas"]["DryRunSkippedFieldDto"][]; + }; + /** @description A field skipped during a dry-run apply, with the reason. */ + DryRunSkippedFieldDto: { + field: string; + reason: string; + }; /** @description A group of duplicate books */ DuplicateGroup: { /** @description List of book IDs that share this hash */ @@ -9474,6 +9537,18 @@ export interface components { before?: unknown; field: string; }; + /** @description One entry from `GET /api/v1/metadata-refresh/field-groups`. */ + FieldGroupDto: { + /** + * @description camelCase field names this group expands into. Match the + * `should_apply_field` call sites in `MetadataApplier`. + */ + fields: string[]; + /** @description Snake_case identifier stored in [`MetadataRefreshConfig::field_groups`]. */ + id: string; + /** @description Human-readable label for UI display. */ + label: string; + }; /** @description Operators for string and equality comparisons */ FieldOperator: { /** @enum {string} */ @@ -11444,6 +11519,63 @@ export interface components { /** @description Summary counts */ summary: components["schemas"]["PreviewSummary"]; }; + /** + * @description Full read response for a library's scheduled metadata-refresh config. + * + * When the library has no stored config, the server returns + * [`MetadataRefreshConfig::default`] so clients always render something. + */ + MetadataRefreshConfigDto: { + cronSchedule: string; + enabled: boolean; + existingSourceIdsOnly: boolean; + extraFields: string[]; + fieldGroups: string[]; + /** Format: int32 */ + maxConcurrency: number; + perProviderOverrides?: { + [key: string]: components["schemas"]["ProviderOverrideDto"]; + } | null; + providers: string[]; + /** Format: int32 */ + skipRecentlySyncedWithinS: number; + timezone?: string | null; + }; + /** + * @description Partial PATCH body. Uses [`PatchValue`] for nullable fields so clients can + * distinguish "leave alone" from "explicit clear". + * + * All other fields use plain `Option` because clearing a non-nullable + * field doesn't make sense (e.g. you can't "unset" `enabled` — you can only + * flip it). + */ + MetadataRefreshConfigPatchDto: { + /** @default null */ + cronSchedule: string | null; + /** @default null */ + enabled: boolean | null; + /** @default null */ + existingSourceIdsOnly: boolean | null; + /** @default null */ + extraFields: string[] | null; + /** @default null */ + fieldGroups: string[] | null; + /** + * Format: int32 + * @default null + */ + maxConcurrency: number | null; + perProviderOverrides?: Record | null; + /** @default null */ + providers: string[] | null; + /** + * Format: int32 + * @default null + */ + skipRecentlySyncedWithinS: number | null; + /** @default null */ + timezone: string | null; + }; /** @description Response for cleanup operation */ MetricsCleanupResponse: { /** @@ -13611,6 +13743,14 @@ export interface components { /** @description Number of fields that will be applied */ willApply: number; }; + /** + * @description Per-provider override placeholder (Phase 8). Mirrors + * [`crate::services::metadata::ProviderOverride`] for the wire format. + */ + ProviderOverrideDto: { + extraFields?: string[]; + fieldGroups?: string[]; + }; /** * @description Public setting DTO (for non-admin users) * @@ -14399,6 +14539,15 @@ export interface components { */ tasksEnqueued: number; }; + /** @description Response for `POST /libraries/{id}/metadata-refresh/run-now`. */ + RunNowResponse: { + /** + * Format: uuid + * @description Background task ID. Subscribe to events on `/api/v1/events/stream` to + * follow progress. + */ + taskId: string; + }; /** @description Scan status response */ ScanStatusDto: { /** From b4e351f3df3c8e243e4d580158e970c1bf3f7b0d Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 13:50:56 -0700 Subject: [PATCH 07/12] feat(web): add per-library metadata refresh settings UI Surface the scheduled metadata-refresh feature in the library settings flow. A new "Metadata Refresh" tab in the edit-library modal lets admins configure the schedule (enable + cron preset or custom expression + timezone), choose which field groups to refresh, pick metadata providers, and tune safety options (existing-source-IDs-only, recency cutoff, max concurrency). A "Preview changes" action drives a synchronous dry-run and renders the resulting deltas as `field: before -> after` rows, with locked or otherwise-skipped fields shown alongside their reason. Unresolved providers are highlighted so typos and disabled plugins are easy to spot. "Run now" enqueues the refresh task and reuses the existing task-progress SSE channel to render an inline progress alert; the button stays disabled while a refresh is in flight to avoid piling work onto an active run. Includes a typed API client, TanStack Query hooks (config get/update, run-now, dry-run, field-groups), and component/hook tests covering hydration, save, run-now, dry-run rendering, and edge states. --- web/src/api/metadataRefresh.test.ts | 164 ++++++++ web/src/api/metadataRefresh.ts | 87 ++++ web/src/components/forms/LibraryModal.tsx | 15 + .../MetadataRefreshDryRunResult.test.tsx | 88 ++++ .../library/MetadataRefreshDryRunResult.tsx | 209 ++++++++++ .../library/MetadataRefreshSettings.test.tsx | 185 +++++++++ .../library/MetadataRefreshSettings.tsx | 379 ++++++++++++++++++ .../hooks/useLibraryMetadataRefresh.test.tsx | 182 +++++++++ web/src/hooks/useLibraryMetadataRefresh.ts | 114 ++++++ 9 files changed, 1423 insertions(+) create mode 100644 web/src/api/metadataRefresh.test.ts create mode 100644 web/src/api/metadataRefresh.ts create mode 100644 web/src/components/library/MetadataRefreshDryRunResult.test.tsx create mode 100644 web/src/components/library/MetadataRefreshDryRunResult.tsx create mode 100644 web/src/components/library/MetadataRefreshSettings.test.tsx create mode 100644 web/src/components/library/MetadataRefreshSettings.tsx create mode 100644 web/src/hooks/useLibraryMetadataRefresh.test.tsx create mode 100644 web/src/hooks/useLibraryMetadataRefresh.ts diff --git a/web/src/api/metadataRefresh.test.ts b/web/src/api/metadataRefresh.test.ts new file mode 100644 index 00000000..164289af --- /dev/null +++ b/web/src/api/metadataRefresh.test.ts @@ -0,0 +1,164 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { api } from "./client"; +import { metadataRefreshApi } from "./metadataRefresh"; + +vi.mock("./client", () => ({ + api: { + get: vi.fn(), + post: vi.fn(), + patch: vi.fn(), + }, +})); + +const LIBRARY_ID = "11111111-1111-1111-1111-111111111111"; + +describe("metadataRefreshApi", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe("get", () => { + it("fetches the saved config for a library", async () => { + const config = { + enabled: true, + cronSchedule: "0 4 * * *", + timezone: null, + fieldGroups: ["ratings", "status", "counts"], + extraFields: [], + providers: ["plugin:mangabaka"], + existingSourceIdsOnly: true, + skipRecentlySyncedWithinS: 3600, + maxConcurrency: 4, + }; + vi.mocked(api.get).mockResolvedValueOnce({ data: config }); + + const result = await metadataRefreshApi.get(LIBRARY_ID); + + expect(api.get).toHaveBeenCalledWith( + `/libraries/${LIBRARY_ID}/metadata-refresh`, + ); + expect(result).toEqual(config); + }); + }); + + describe("update", () => { + it("PATCHes only the provided fields", async () => { + const updated = { + enabled: true, + cronSchedule: "0 0 * * *", + timezone: null, + fieldGroups: ["ratings"], + extraFields: [], + providers: ["plugin:mangabaka"], + existingSourceIdsOnly: true, + skipRecentlySyncedWithinS: 3600, + maxConcurrency: 4, + }; + vi.mocked(api.patch).mockResolvedValueOnce({ data: updated }); + + const result = await metadataRefreshApi.update(LIBRARY_ID, { + enabled: true, + fieldGroups: ["ratings"], + }); + + expect(api.patch).toHaveBeenCalledWith( + `/libraries/${LIBRARY_ID}/metadata-refresh`, + { enabled: true, fieldGroups: ["ratings"] }, + ); + expect(result).toEqual(updated); + }); + }); + + describe("runNow", () => { + it("POSTs run-now and returns the task id", async () => { + const taskId = "22222222-2222-2222-2222-222222222222"; + vi.mocked(api.post).mockResolvedValueOnce({ data: { taskId } }); + + const result = await metadataRefreshApi.runNow(LIBRARY_ID); + + expect(api.post).toHaveBeenCalledWith( + `/libraries/${LIBRARY_ID}/metadata-refresh/run-now`, + ); + expect(result).toEqual({ taskId }); + }); + }); + + describe("dryRun", () => { + it("posts default body when no override is supplied", async () => { + const dryRun = { + sample: [], + totalEligible: 0, + estSkippedNoId: 0, + estSkippedRecentlySynced: 0, + }; + vi.mocked(api.post).mockResolvedValueOnce({ data: dryRun }); + + const result = await metadataRefreshApi.dryRun(LIBRARY_ID); + + expect(api.post).toHaveBeenCalledWith( + `/libraries/${LIBRARY_ID}/metadata-refresh/dry-run`, + { configOverride: null, sampleSize: null }, + ); + expect(result).toEqual(dryRun); + }); + + it("forwards configOverride and sampleSize when provided", async () => { + const config = { + enabled: true, + cronSchedule: "0 4 * * *", + timezone: null, + fieldGroups: ["ratings"], + extraFields: [], + providers: ["plugin:mangabaka"], + existingSourceIdsOnly: true, + skipRecentlySyncedWithinS: 3600, + maxConcurrency: 4, + }; + const dryRun = { + sample: [ + { + seriesId: "33333333-3333-3333-3333-333333333333", + seriesTitle: "Test", + provider: "plugin:mangabaka", + changes: [{ field: "rating", before: 80, after: 82 }], + skipped: [], + }, + ], + totalEligible: 12, + estSkippedNoId: 0, + estSkippedRecentlySynced: 0, + }; + vi.mocked(api.post).mockResolvedValueOnce({ data: dryRun }); + + const result = await metadataRefreshApi.dryRun(LIBRARY_ID, { + configOverride: config, + sampleSize: 5, + }); + + expect(api.post).toHaveBeenCalledWith( + `/libraries/${LIBRARY_ID}/metadata-refresh/dry-run`, + { configOverride: config, sampleSize: 5 }, + ); + expect(result).toEqual(dryRun); + }); + }); + + describe("listFieldGroups", () => { + it("returns the field group catalog", async () => { + const groups = [ + { id: "ratings", label: "Ratings", fields: ["rating"] }, + { id: "status", label: "Status", fields: ["status"] }, + ]; + vi.mocked(api.get).mockResolvedValueOnce({ data: groups }); + + const result = await metadataRefreshApi.listFieldGroups(); + + expect(api.get).toHaveBeenCalledWith("/metadata-refresh/field-groups"); + expect(result).toEqual(groups); + }); + }); +}); diff --git a/web/src/api/metadataRefresh.ts b/web/src/api/metadataRefresh.ts new file mode 100644 index 00000000..11aa3c33 --- /dev/null +++ b/web/src/api/metadataRefresh.ts @@ -0,0 +1,87 @@ +import type { components } from "@/types/api.generated"; +import { api } from "./client"; + +export type MetadataRefreshConfig = + components["schemas"]["MetadataRefreshConfigDto"]; +export type MetadataRefreshConfigPatch = + components["schemas"]["MetadataRefreshConfigPatchDto"]; +export type ProviderOverride = components["schemas"]["ProviderOverrideDto"]; +export type FieldGroup = components["schemas"]["FieldGroupDto"]; +export type RunNowResponse = components["schemas"]["RunNowResponse"]; +export type DryRunRequest = components["schemas"]["DryRunRequest"]; +export type DryRunResponse = components["schemas"]["DryRunResponse"]; +export type DryRunSeriesDelta = components["schemas"]["DryRunSeriesDelta"]; +export type DryRunSkippedField = components["schemas"]["DryRunSkippedFieldDto"]; +export type FieldChange = components["schemas"]["FieldChangeDto"]; + +/** + * Hand-written PATCH input. The generated PATCH DTO uses `T | null` everywhere + * with `default: null`, but in practice clients should omit fields they don't + * want to change. This narrower type lets callers just pass the fields they + * care about. + */ +export type MetadataRefreshConfigPatchInput = Partial<{ + enabled: boolean; + cronSchedule: string; + timezone: string | null; + fieldGroups: string[]; + extraFields: string[]; + providers: string[]; + existingSourceIdsOnly: boolean; + skipRecentlySyncedWithinS: number; + maxConcurrency: number; + perProviderOverrides: Record | null; +}>; + +export const metadataRefreshApi = { + /** Read the current saved config (server returns defaults if none stored). */ + get: async (libraryId: string): Promise => { + const response = await api.get( + `/libraries/${libraryId}/metadata-refresh`, + ); + return response.data; + }, + + /** Partial update; omitted fields are left alone server-side. */ + update: async ( + libraryId: string, + patch: MetadataRefreshConfigPatchInput, + ): Promise => { + const response = await api.patch( + `/libraries/${libraryId}/metadata-refresh`, + patch, + ); + return response.data; + }, + + /** Enqueue an immediate refresh task. */ + runNow: async (libraryId: string): Promise => { + const response = await api.post( + `/libraries/${libraryId}/metadata-refresh/run-now`, + ); + return response.data; + }, + + /** + * Preview what a refresh would change. When `configOverride` is set, the + * preview uses that unsaved config instead of the persisted one. + */ + dryRun: async ( + libraryId: string, + request: DryRunRequest = { configOverride: null, sampleSize: null }, + ): Promise => { + const response = await api.post( + `/libraries/${libraryId}/metadata-refresh/dry-run`, + request, + ); + return response.data; + }, + + /** Catalog of field groups exposed by the server. */ + listFieldGroups: async (): Promise => { + const response = await api.get( + "/metadata-refresh/field-groups", + ); + return response.data; + }, +}; diff --git a/web/src/components/forms/LibraryModal.tsx b/web/src/components/forms/LibraryModal.tsx index 2718dc0f..7adb7092 100644 --- a/web/src/components/forms/LibraryModal.tsx +++ b/web/src/components/forms/LibraryModal.tsx @@ -50,6 +50,7 @@ import type { ScanningConfig, SeriesStrategy, } from "@/types"; +import { MetadataRefreshSettings } from "../library/MetadataRefreshSettings"; import { type AutoMatchConditions, ConditionsEditor } from "./ConditionsEditor"; import { CronInput } from "./CronInput"; import { @@ -930,6 +931,14 @@ export function LibraryModal({ opened, onClose, library }: LibraryModalProps) { > Conditions + {isEditMode && ( + } + > + Metadata Refresh + + )} @@ -955,6 +964,12 @@ export function LibraryModal({ opened, onClose, library }: LibraryModalProps) { {renderConditionsTab()} + + {isEditMode && library && ( + + + + )} diff --git a/web/src/components/library/MetadataRefreshDryRunResult.test.tsx b/web/src/components/library/MetadataRefreshDryRunResult.test.tsx new file mode 100644 index 00000000..7a31d1fb --- /dev/null +++ b/web/src/components/library/MetadataRefreshDryRunResult.test.tsx @@ -0,0 +1,88 @@ +import { describe, expect, it, vi } from "vitest"; +import type { DryRunResponse } from "@/api/metadataRefresh"; +import { renderWithProviders, screen } from "@/test/utils"; +import { MetadataRefreshDryRunResult } from "./MetadataRefreshDryRunResult"; + +function makeResponse(overrides: Partial = {}): DryRunResponse { + return { + sample: [], + totalEligible: 0, + estSkippedNoId: 0, + estSkippedRecentlySynced: 0, + ...overrides, + }; +} + +describe("MetadataRefreshDryRunResult", () => { + it("renders loading state", () => { + renderWithProviders( + , + ); + expect(screen.getByText(/Computing preview/i)).toBeInTheDocument(); + }); + + it("renders empty sample message when nothing eligible", () => { + renderWithProviders( + , + ); + expect( + screen.getByText(/No series eligible for refresh/i), + ).toBeInTheDocument(); + }); + + it("renders changes and skipped fields", () => { + const result = makeResponse({ + totalEligible: 12, + estSkippedNoId: 3, + estSkippedRecentlySynced: 1, + sample: [ + { + seriesId: "11111111-1111-1111-1111-111111111111", + seriesTitle: "Test Series", + provider: "plugin:mangabaka", + changes: [ + { field: "rating", before: 80, after: 82 }, + { field: "status", before: "ongoing", after: "completed" }, + ], + skipped: [{ field: "summary", reason: "field locked" }], + }, + ], + }); + renderWithProviders( + , + ); + + expect(screen.getByText("Test Series")).toBeInTheDocument(); + expect(screen.getByText("plugin:mangabaka")).toBeInTheDocument(); + expect(screen.getByText("rating")).toBeInTheDocument(); + expect(screen.getByText("80")).toBeInTheDocument(); + expect(screen.getByText("82")).toBeInTheDocument(); + expect(screen.getByText("status")).toBeInTheDocument(); + expect(screen.getByText("summary")).toBeInTheDocument(); + expect(screen.getByText(/field locked/i)).toBeInTheDocument(); + expect(screen.getByText("12")).toBeInTheDocument(); + expect(screen.getByText("3")).toBeInTheDocument(); + expect(screen.getByText("1")).toBeInTheDocument(); + }); + + it("warns about unresolved providers", () => { + const result = makeResponse({ + totalEligible: 0, + unresolvedProviders: ["plugin:typo"], + }); + renderWithProviders( + , + ); + expect(screen.getByText(/Unresolved providers/i)).toBeInTheDocument(); + expect(screen.getByText("plugin:typo")).toBeInTheDocument(); + }); +}); diff --git a/web/src/components/library/MetadataRefreshDryRunResult.tsx b/web/src/components/library/MetadataRefreshDryRunResult.tsx new file mode 100644 index 00000000..36c4276a --- /dev/null +++ b/web/src/components/library/MetadataRefreshDryRunResult.tsx @@ -0,0 +1,209 @@ +import { + Alert, + Badge, + Card, + Group, + Modal, + Paper, + ScrollArea, + SimpleGrid, + Stack, + Text, +} from "@mantine/core"; +import { IconAlertTriangle, IconLock } from "@tabler/icons-react"; +import type { + DryRunResponse, + DryRunSeriesDelta, + FieldChange, +} from "@/api/metadataRefresh"; + +interface MetadataRefreshDryRunResultProps { + opened: boolean; + onClose: () => void; + result: DryRunResponse | null; + loading?: boolean; +} + +/** + * Render a single field-change row: `field: before → after`. `before` and + * `after` are arbitrary JSON, so we stringify whatever we get for display. + */ +function fmtValue(value: unknown): string { + if (value === null || value === undefined) return "(empty)"; + if (typeof value === "string") return value.length > 0 ? value : "(empty)"; + try { + return JSON.stringify(value); + } catch { + return String(value); + } +} + +function ChangeRow({ change }: { change: FieldChange }) { + return ( + + + {change.field} + + + {fmtValue(change.before)} + + + → + + + {fmtValue(change.after)} + + + ); +} + +function DeltaCard({ delta }: { delta: DryRunSeriesDelta }) { + const hasChanges = delta.changes.length > 0; + const skipped = delta.skipped ?? []; + const hasOnlySkips = !hasChanges && skipped.length > 0; + + return ( + + + + + {delta.seriesTitle} + + + {delta.provider} + + + + {hasChanges && ( + + {delta.changes.map((c) => ( + + ))} + + )} + + {skipped.length > 0 && ( + + {skipped.map((s) => ( + + + + + {s.field} + + : {s.reason} + + + ))} + + )} + + {hasOnlySkips && ( + + Nothing would change for this series. + + )} + + {!hasChanges && skipped.length === 0 && ( + + No deltas reported. + + )} + + + ); +} + +export function MetadataRefreshDryRunResult({ + opened, + onClose, + result, + loading, +}: MetadataRefreshDryRunResultProps) { + return ( + + {loading ? ( + Computing preview… + ) : !result ? ( + No preview available yet. + ) : ( + + + + + + Eligible (series × provider) + + {result.totalEligible} + + + + Skipped (no external ID) + + {result.estSkippedNoId} + + + + Skipped (recently synced) + + {result.estSkippedRecentlySynced} + + + + + {result.unresolvedProviders && + result.unresolvedProviders.length > 0 && ( + } + color="yellow" + variant="light" + title="Unresolved providers" + > + + These providers in your config don't match an enabled plugin: + + + {result.unresolvedProviders.map((p) => ( + + {p} + + ))} + + + )} + + {result.sample.length === 0 ? ( + + No series eligible for refresh under the current configuration. + Try widening the provider list or disabling "use existing source + IDs only". + + ) : ( + + + Showing {result.sample.length} of {result.totalEligible}{" "} + eligible (series × provider) pairs. + + {result.sample.map((delta) => ( + + ))} + + )} + + )} + + ); +} diff --git a/web/src/components/library/MetadataRefreshSettings.test.tsx b/web/src/components/library/MetadataRefreshSettings.test.tsx new file mode 100644 index 00000000..621b8973 --- /dev/null +++ b/web/src/components/library/MetadataRefreshSettings.test.tsx @@ -0,0 +1,185 @@ +import { HttpResponse, http } from "msw"; +import { setupServer } from "msw/node"; +import { + afterAll, + afterEach, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from "vitest"; +import { renderWithProviders, screen, userEvent, waitFor } from "@/test/utils"; +import { MetadataRefreshSettings } from "./MetadataRefreshSettings"; + +vi.mock("@mantine/notifications", () => ({ + notifications: { + show: vi.fn(), + }, +})); + +// Avoid pulling task-progress SSE machinery into tests. +vi.mock("@/hooks/useTaskProgress", () => ({ + useTaskProgress: () => ({ + activeTasks: [], + connectionState: "disconnected", + pendingCounts: {}, + getTask: () => undefined, + getTasksByStatus: () => [], + getTasksByLibrary: () => [], + }), +})); + +// Pin the scheduler timezone so the placeholder text is stable in tests. +vi.mock("@/hooks/useSchedulerTimezone", () => ({ + useSchedulerTimezone: () => "UTC", +})); + +const LIBRARY_ID = "11111111-1111-1111-1111-111111111111"; + +const baseConfig = { + enabled: true, + cronSchedule: "0 4 * * *", + timezone: null, + fieldGroups: ["ratings", "status"], + extraFields: [], + providers: ["plugin:mangabaka"], + existingSourceIdsOnly: true, + skipRecentlySyncedWithinS: 3600, + maxConcurrency: 4, +}; + +const fieldGroups = [ + { id: "ratings", label: "Ratings", fields: ["rating"] }, + { id: "status", label: "Status", fields: ["status", "year"] }, + { id: "counts", label: "Counts", fields: ["totalChapters"] }, +]; + +const pluginActions = { + scope: "series:bulk", + actions: [ + { + pluginId: "22222222-2222-2222-2222-222222222222", + pluginName: "mangabaka", + pluginDisplayName: "MangaBaka", + actionType: "metadata_search", + label: "Fetch from MangaBaka", + }, + ], +}; + +const server = setupServer( + http.get(`/api/v1/libraries/${LIBRARY_ID}/metadata-refresh`, () => + HttpResponse.json(baseConfig), + ), + http.get("/api/v1/metadata-refresh/field-groups", () => + HttpResponse.json(fieldGroups), + ), + http.get("/api/v1/plugins/actions", () => HttpResponse.json(pluginActions)), +); + +beforeAll(() => server.listen({ onUnhandledRequest: "bypass" })); +afterEach(() => server.resetHandlers()); +afterAll(() => server.close()); + +beforeEach(() => { + vi.clearAllMocks(); +}); + +describe("MetadataRefreshSettings", () => { + it("hydrates the form from the saved config", async () => { + renderWithProviders(); + + await waitFor(() => + expect(screen.getByLabelText(/Enable scheduled refresh/i)).toBeChecked(), + ); + + expect( + screen.getByLabelText(/Use existing source IDs only/i), + ).toBeChecked(); + // Concurrency hydrated to 4 + expect(screen.getByLabelText(/Max concurrency/i)).toHaveValue("4"); + }); + + it("PATCHes the config when Save is clicked", async () => { + let captured: unknown = null; + server.use( + http.patch( + `/api/v1/libraries/${LIBRARY_ID}/metadata-refresh`, + async ({ request }) => { + captured = await request.json(); + return HttpResponse.json({ ...baseConfig, enabled: false }); + }, + ), + ); + + renderWithProviders(); + + const enableCheckbox = await screen.findByLabelText( + /Enable scheduled refresh/i, + ); + const user = userEvent.setup(); + await user.click(enableCheckbox); + + const saveButton = screen.getByRole("button", { name: "Save" }); + await user.click(saveButton); + + await waitFor(() => expect(captured).not.toBeNull()); + expect((captured as { enabled: boolean }).enabled).toBe(false); + }); + + it("triggers run-now and shows the response", async () => { + const taskId = "33333333-3333-3333-3333-333333333333"; + let runNowCalled = false; + server.use( + http.post( + `/api/v1/libraries/${LIBRARY_ID}/metadata-refresh/run-now`, + () => { + runNowCalled = true; + return HttpResponse.json({ taskId }); + }, + ), + ); + + renderWithProviders(); + + const runNowButton = await screen.findByRole("button", { + name: /Run now/i, + }); + const user = userEvent.setup(); + await user.click(runNowButton); + + await waitFor(() => expect(runNowCalled).toBe(true)); + }); + + it("opens the dry-run modal with results", async () => { + server.use( + http.post( + `/api/v1/libraries/${LIBRARY_ID}/metadata-refresh/dry-run`, + () => + HttpResponse.json({ + sample: [], + totalEligible: 7, + estSkippedNoId: 0, + estSkippedRecentlySynced: 0, + }), + ), + ); + + renderWithProviders(); + + const previewButton = await screen.findByRole("button", { + name: /Preview changes/i, + }); + await waitFor(() => expect(previewButton).not.toBeDisabled()); + + const user = userEvent.setup(); + await user.click(previewButton); + + await waitFor(() => + expect(screen.getByText("Dry-run preview")).toBeInTheDocument(), + ); + await waitFor(() => expect(screen.getByText("7")).toBeInTheDocument()); + }); +}); diff --git a/web/src/components/library/MetadataRefreshSettings.tsx b/web/src/components/library/MetadataRefreshSettings.tsx new file mode 100644 index 00000000..6e017648 --- /dev/null +++ b/web/src/components/library/MetadataRefreshSettings.tsx @@ -0,0 +1,379 @@ +import { + Alert, + Anchor, + Badge, + Button, + Checkbox, + Group, + MultiSelect, + NumberInput, + Paper, + Select, + Stack, + Text, + TextInput, + Tooltip, +} from "@mantine/core"; +import { + IconInfoCircle, + IconPlayerPlay, + IconRefresh, +} from "@tabler/icons-react"; +import { useQuery } from "@tanstack/react-query"; +import { useEffect, useMemo, useState } from "react"; +import { type PluginActionsResponse, pluginsApi } from "@/api/plugins"; +import { + useDryRunMetadataRefresh, + useFieldGroups, + useMetadataRefreshConfig, + useRunMetadataRefreshNow, + useUpdateMetadataRefreshConfig, +} from "@/hooks/useLibraryMetadataRefresh"; +import { useSchedulerTimezone } from "@/hooks/useSchedulerTimezone"; +import { useTaskProgress } from "@/hooks/useTaskProgress"; +import { CronInput } from "../forms/CronInput"; +import { MetadataRefreshDryRunResult } from "./MetadataRefreshDryRunResult"; + +interface MetadataRefreshSettingsProps { + libraryId: string; +} + +const CRON_PRESETS: { value: string; label: string }[] = [ + { value: "0 * * * *", label: "Hourly (top of the hour)" }, + { value: "0 */6 * * *", label: "Every 6 hours" }, + { value: "0 4 * * *", label: "Daily at 04:00" }, + { value: "0 4 * * 0", label: "Weekly (Sunday 04:00)" }, + { value: "__custom__", label: "Custom cron expression…" }, +]; + +function presetForCron(cron: string): string { + return CRON_PRESETS.some((p) => p.value === cron && p.value !== "__custom__") + ? cron + : "__custom__"; +} + +export function MetadataRefreshSettings({ + libraryId, +}: MetadataRefreshSettingsProps) { + const schedulerTimezone = useSchedulerTimezone(); + const configQuery = useMetadataRefreshConfig(libraryId); + const fieldGroupsQuery = useFieldGroups(); + const updateMutation = useUpdateMetadataRefreshConfig(libraryId); + const runNowMutation = useRunMetadataRefreshNow(libraryId); + const dryRunMutation = useDryRunMetadataRefresh(libraryId); + + const pluginsQuery = useQuery({ + queryKey: ["plugins", "actions", "series:bulk", libraryId], + queryFn: () => pluginsApi.getActions("series:bulk", libraryId), + }); + + const { getTask } = useTaskProgress(); + + // Local form state (initialized from server, synced on save). + const [enabled, setEnabled] = useState(false); + const [cronPreset, setCronPreset] = useState("0 4 * * *"); + const [cronSchedule, setCronSchedule] = useState("0 4 * * *"); + const [timezone, setTimezone] = useState(""); + const [fieldGroups, setFieldGroups] = useState([ + "ratings", + "status", + "counts", + ]); + const [providers, setProviders] = useState([]); + const [existingSourceIdsOnly, setExistingSourceIdsOnly] = useState(true); + const [skipRecentlySyncedHours, setSkipRecentlySyncedHours] = + useState(1); + const [maxConcurrency, setMaxConcurrency] = useState(4); + + const [dryRunOpen, setDryRunOpen] = useState(false); + const [activeTaskId, setActiveTaskId] = useState(null); + + // Hydrate form when the saved config loads. + useEffect(() => { + const cfg = configQuery.data; + if (!cfg) return; + setEnabled(cfg.enabled); + setCronSchedule(cfg.cronSchedule); + setCronPreset(presetForCron(cfg.cronSchedule)); + setTimezone(cfg.timezone ?? ""); + setFieldGroups(cfg.fieldGroups); + setProviders(cfg.providers); + setExistingSourceIdsOnly(cfg.existingSourceIdsOnly); + setSkipRecentlySyncedHours( + Math.max(0, Math.round((cfg.skipRecentlySyncedWithinS ?? 3600) / 3600)), + ); + setMaxConcurrency(cfg.maxConcurrency || 4); + }, [configQuery.data]); + + // Provider options: each entry's value is the wire format `"plugin:"`. + // Dedupe — the actions endpoint returns one row per (plugin, action) pair. + const providerOptions = useMemo(() => { + const seen = new Set(); + const out: { value: string; label: string }[] = []; + for (const action of pluginsQuery.data?.actions ?? []) { + const value = `plugin:${action.pluginName}`; + if (seen.has(value)) continue; + seen.add(value); + out.push({ value, label: action.pluginDisplayName || action.pluginName }); + } + return out; + }, [pluginsQuery.data]); + + const fieldGroupOptions = useMemo( + () => + (fieldGroupsQuery.data ?? []).map((g) => ({ + value: g.id, + label: g.label, + })), + [fieldGroupsQuery.data], + ); + + const fieldsByGroup = useMemo(() => { + const map = new Map(); + for (const g of fieldGroupsQuery.data ?? []) { + map.set(g.id, g.fields); + } + return map; + }, [fieldGroupsQuery.data]); + + const buildPatch = () => ({ + enabled, + cronSchedule, + timezone: timezone.trim() ? timezone.trim() : null, + fieldGroups, + providers, + existingSourceIdsOnly, + skipRecentlySyncedWithinS: Math.max(0, skipRecentlySyncedHours) * 3600, + maxConcurrency, + }); + + const handleSave = () => { + updateMutation.mutate(buildPatch()); + }; + + const handlePreview = () => { + setDryRunOpen(true); + dryRunMutation.mutate({ + configOverride: { + ...buildPatch(), + timezone: timezone.trim() ? timezone.trim() : null, + extraFields: configQuery.data?.extraFields ?? [], + }, + sampleSize: 5, + }); + }; + + const handleRunNow = () => { + runNowMutation.mutate(undefined, { + onSuccess: (response) => setActiveTaskId(response.taskId), + }); + }; + + const handleCronPresetChange = (value: string | null) => { + if (!value) return; + setCronPreset(value); + if (value !== "__custom__") { + setCronSchedule(value); + } + }; + + const activeTask = activeTaskId ? getTask(activeTaskId) : undefined; + const refreshTaskInFlight = + !!activeTask && + activeTask.status !== "completed" && + activeTask.status !== "failed"; + + if (configQuery.isLoading) { + return Loading schedule…; + } + if (configQuery.isError) { + return ( + + {(configQuery.error as Error)?.message ?? "Unknown error"} + + ); + } + + return ( + + } color="blue" variant="light"> + Configure scheduled metadata refreshes for this library. Field locks + always win: a locked field is never written, even when its group is + selected here. + + + + + Schedule + setEnabled(e.currentTarget.checked)} + /> + + {enabled && ( + <> + v && setCronPreset(v)} + /> + {cronPreset === "custom" && ( + + )} + setTimezone(e.currentTarget.value)} + /> + + + + - {cronPreset === "__custom__" && ( - - )} - - IANA timezone for the schedule. Leave empty to use the - server default ({schedulerTimezone}).{" "} - - View all timezones - - - } - placeholder={schedulerTimezone} - value={timezone} - onChange={(e) => setTimezone(e.currentTarget.value)} - /> - - )} - - - - - - What to refresh - { - const fields = fieldsByGroup.get(option.value) ?? []; - const tooltip = fields.length - ? `Includes: ${fields.join(", ")}` - : option.label; - return ( - - - {checked && } - {option.label} - - - ); - }} - /> - - - - {providers.length > 0 && ( - - - Per-provider field customization - - - Each provider inherits the library-wide field group selection - above. Expand a provider below to override its allowlist (for - example: AniList for ratings only, MangaBaka for status and - counts). - - - {providers.map((provider) => { - const expanded = expandedProviders[provider] ?? false; - const overridden = isOverridden(provider); - const override = perProviderOverrides[provider]; - return ( - - - - toggleExpanded(provider)} - aria-label={`Customize fields for ${providerLabel(provider)}`} - aria-expanded={expanded} - > - - {expanded ? ( - - ) : ( - - )} - {providerLabel(provider)} - {overridden ? ( - - Custom - - ) : ( - - Inherits library - - )} - - - {overridden && ( - - )} - - - - - setOverride(provider, { - fieldGroups: value, - extraFields: override?.extraFields ?? [], - }) - } - comboboxProps={{ zIndex: 1001 }} - data-testid={`override-groups-${provider}`} - /> - - - - - ); - })} - - - )} - - - - - - Safety - setExistingSourceIdsOnly(e.currentTarget.checked)} - /> - - setSkipRecentlySyncedHours(typeof v === "number" ? v : 0) - } - /> - - setMaxConcurrency(typeof v === "number" && v > 0 ? v : 1) - } - /> - - - - {refreshTaskInFlight && activeTask && ( - }> - - - Refresh in progress - {activeTask.progress - ? ` — ${activeTask.progress.current} / ${activeTask.progress.total}` - : ""} - - - {activeTask.status} - - - - )} - - - - - - - - setDryRunOpen(false)} - result={dryRunMutation.data ?? null} - loading={dryRunMutation.isPending} - /> - - ); -} diff --git a/web/src/hooks/useLibraryJobs.ts b/web/src/hooks/useLibraryJobs.ts new file mode 100644 index 00000000..529d4038 --- /dev/null +++ b/web/src/hooks/useLibraryJobs.ts @@ -0,0 +1,150 @@ +import { notifications } from "@mantine/notifications"; +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; +import { + type CreateLibraryJobRequest, + type DryRunRequest, + type DryRunResponse, + type FieldGroup, + type LibraryJob, + libraryJobsApi, + type PatchLibraryJobInput, +} from "@/api/libraryJobs"; + +const listKey = (libraryId: string) => + ["library-jobs", libraryId, "list"] as const; +const detailKey = (libraryId: string, jobId: string) => + ["library-jobs", libraryId, jobId] as const; +const fieldGroupsKey = ["library-jobs", "field-groups"] as const; + +type ApiError = Error & { + response?: { data?: { error?: string; message?: string } }; +}; + +const errorText = (e: ApiError) => + e.response?.data?.message ?? e.response?.data?.error ?? e.message; + +export function useLibraryJobsList(libraryId: string) { + return useQuery({ + queryKey: listKey(libraryId), + queryFn: () => libraryJobsApi.list(libraryId), + enabled: Boolean(libraryId), + }); +} + +export function useLibraryJob(libraryId: string, jobId: string | undefined) { + return useQuery({ + queryKey: detailKey(libraryId, jobId ?? ""), + queryFn: () => libraryJobsApi.get(libraryId, jobId as string), + enabled: Boolean(libraryId && jobId), + }); +} + +export function useFieldGroups() { + return useQuery({ + queryKey: fieldGroupsKey, + queryFn: () => libraryJobsApi.fieldGroups(), + staleTime: Number.POSITIVE_INFINITY, + }); +} + +export function useCreateLibraryJob(libraryId: string) { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (body: CreateLibraryJobRequest) => + libraryJobsApi.create(libraryId, body), + onSuccess: () => { + qc.invalidateQueries({ queryKey: listKey(libraryId) }); + notifications.show({ + title: "Job created", + message: "The new library job is ready.", + color: "green", + }); + }, + onError: (e: ApiError) => { + notifications.show({ + title: "Couldn't create job", + message: errorText(e), + color: "red", + }); + }, + }); +} + +export function useUpdateLibraryJob(libraryId: string) { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (args: { jobId: string; patch: PatchLibraryJobInput }) => + libraryJobsApi.update(libraryId, args.jobId, args.patch), + onSuccess: (_, args) => { + qc.invalidateQueries({ queryKey: listKey(libraryId) }); + qc.invalidateQueries({ queryKey: detailKey(libraryId, args.jobId) }); + }, + onError: (e: ApiError) => { + notifications.show({ + title: "Couldn't save job", + message: errorText(e), + color: "red", + }); + }, + }); +} + +export function useDeleteLibraryJob(libraryId: string) { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (jobId: string) => libraryJobsApi.delete(libraryId, jobId), + onSuccess: () => { + qc.invalidateQueries({ queryKey: listKey(libraryId) }); + notifications.show({ + title: "Job deleted", + message: "Library job removed.", + color: "blue", + }); + }, + onError: (e: ApiError) => { + notifications.show({ + title: "Couldn't delete job", + message: errorText(e), + color: "red", + }); + }, + }); +} + +export function useRunLibraryJobNow(libraryId: string) { + return useMutation({ + mutationFn: (jobId: string) => libraryJobsApi.runNow(libraryId, jobId), + onSuccess: (data) => { + notifications.show({ + title: "Job started", + message: `Task ${data.taskId.slice(0, 8)}… enqueued.`, + color: "blue", + }); + }, + onError: (e: ApiError) => { + notifications.show({ + title: "Couldn't start job", + message: errorText(e), + color: "red", + }); + }, + }); +} + +export function useDryRunLibraryJob(libraryId: string) { + return useMutation< + DryRunResponse, + ApiError, + { jobId: string; body?: DryRunRequest } + >({ + mutationFn: ({ jobId, body }) => + libraryJobsApi.dryRun(libraryId, jobId, body ?? {}), + onError: (e: ApiError) => { + notifications.show({ + title: "Preview failed", + message: errorText(e), + color: "red", + }); + }, + }); +} diff --git a/web/src/hooks/useLibraryMetadataRefresh.test.tsx b/web/src/hooks/useLibraryMetadataRefresh.test.tsx deleted file mode 100644 index 9320e795..00000000 --- a/web/src/hooks/useLibraryMetadataRefresh.test.tsx +++ /dev/null @@ -1,182 +0,0 @@ -import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; -import { act, renderHook, waitFor } from "@testing-library/react"; -import { HttpResponse, http } from "msw"; -import { setupServer } from "msw/node"; -import type { ReactNode } from "react"; -import { - afterAll, - afterEach, - beforeAll, - describe, - expect, - it, - vi, -} from "vitest"; -import { - useDryRunMetadataRefresh, - useFieldGroups, - useMetadataRefreshConfig, - useRunMetadataRefreshNow, - useUpdateMetadataRefreshConfig, -} from "./useLibraryMetadataRefresh"; - -vi.mock("@mantine/notifications", () => ({ - notifications: { - show: vi.fn(), - }, -})); - -const server = setupServer(); - -beforeAll(() => server.listen({ onUnhandledRequest: "bypass" })); -afterEach(() => server.resetHandlers()); -afterAll(() => server.close()); - -const LIBRARY_ID = "11111111-1111-1111-1111-111111111111"; - -const baseConfig = { - enabled: true, - cronSchedule: "0 4 * * *", - timezone: null, - fieldGroups: ["ratings"], - extraFields: [], - providers: ["plugin:mangabaka"], - existingSourceIdsOnly: true, - skipRecentlySyncedWithinS: 3600, - maxConcurrency: 4, -}; - -function createWrapper() { - const queryClient = new QueryClient({ - defaultOptions: { - queries: { retry: false, gcTime: 0 }, - mutations: { retry: false }, - }, - }); - return function Wrapper({ children }: { children: ReactNode }) { - return ( - {children} - ); - }; -} - -describe("useMetadataRefreshConfig", () => { - it("fetches the config for the given library", async () => { - server.use( - http.get(`/api/v1/libraries/${LIBRARY_ID}/metadata-refresh`, () => - HttpResponse.json(baseConfig), - ), - ); - - const { result } = renderHook(() => useMetadataRefreshConfig(LIBRARY_ID), { - wrapper: createWrapper(), - }); - - await waitFor(() => expect(result.current.isSuccess).toBe(true)); - expect(result.current.data).toEqual(baseConfig); - }); - - it("does not fetch when libraryId is missing", () => { - const { result } = renderHook(() => useMetadataRefreshConfig(undefined), { - wrapper: createWrapper(), - }); - expect(result.current.fetchStatus).toBe("idle"); - }); -}); - -describe("useFieldGroups", () => { - it("returns the catalog", async () => { - const groups = [ - { id: "ratings", label: "Ratings", fields: ["rating"] }, - { id: "status", label: "Status", fields: ["status"] }, - ]; - server.use( - http.get("/api/v1/metadata-refresh/field-groups", () => - HttpResponse.json(groups), - ), - ); - - const { result } = renderHook(() => useFieldGroups(), { - wrapper: createWrapper(), - }); - - await waitFor(() => expect(result.current.isSuccess).toBe(true)); - expect(result.current.data).toEqual(groups); - }); -}); - -describe("useUpdateMetadataRefreshConfig", () => { - it("PATCHes and updates the cache on success", async () => { - const updated = { ...baseConfig, fieldGroups: ["ratings", "status"] }; - server.use( - http.patch(`/api/v1/libraries/${LIBRARY_ID}/metadata-refresh`, () => - HttpResponse.json(updated), - ), - ); - - const { result } = renderHook( - () => useUpdateMetadataRefreshConfig(LIBRARY_ID), - { wrapper: createWrapper() }, - ); - - let response: typeof updated | undefined; - await act(async () => { - response = await result.current.mutateAsync({ - fieldGroups: ["ratings", "status"], - }); - }); - - expect(response).toEqual(updated); - await waitFor(() => expect(result.current.data).toEqual(updated)); - }); -}); - -describe("useRunMetadataRefreshNow", () => { - it("returns the task id from the run-now endpoint", async () => { - const taskId = "22222222-2222-2222-2222-222222222222"; - server.use( - http.post( - `/api/v1/libraries/${LIBRARY_ID}/metadata-refresh/run-now`, - () => HttpResponse.json({ taskId }), - ), - ); - - const { result } = renderHook(() => useRunMetadataRefreshNow(LIBRARY_ID), { - wrapper: createWrapper(), - }); - - await act(async () => { - const res = await result.current.mutateAsync(); - expect(res).toEqual({ taskId }); - }); - }); -}); - -describe("useDryRunMetadataRefresh", () => { - it("returns dry-run results", async () => { - const dryRun = { - sample: [], - totalEligible: 0, - estSkippedNoId: 0, - estSkippedRecentlySynced: 0, - }; - server.use( - http.post( - `/api/v1/libraries/${LIBRARY_ID}/metadata-refresh/dry-run`, - () => HttpResponse.json(dryRun), - ), - ); - - const { result } = renderHook(() => useDryRunMetadataRefresh(LIBRARY_ID), { - wrapper: createWrapper(), - }); - - await act(async () => { - const res = await result.current.mutateAsync({ - configOverride: null, - sampleSize: 5, - }); - expect(res).toEqual(dryRun); - }); - }); -}); diff --git a/web/src/hooks/useLibraryMetadataRefresh.ts b/web/src/hooks/useLibraryMetadataRefresh.ts deleted file mode 100644 index 68f6f770..00000000 --- a/web/src/hooks/useLibraryMetadataRefresh.ts +++ /dev/null @@ -1,114 +0,0 @@ -import { notifications } from "@mantine/notifications"; -import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; -import { - type DryRunRequest, - type DryRunResponse, - type FieldGroup, - type MetadataRefreshConfig, - type MetadataRefreshConfigPatchInput, - metadataRefreshApi, - type RunNowResponse, -} from "@/api/metadataRefresh"; - -const configKey = (libraryId: string) => - ["library", libraryId, "metadata-refresh"] as const; -const fieldGroupsKey = ["metadata-refresh", "field-groups"] as const; - -/** Read the saved per-library scheduled refresh config. */ -export function useMetadataRefreshConfig(libraryId: string | undefined | null) { - return useQuery({ - queryKey: configKey(libraryId ?? ""), - queryFn: () => metadataRefreshApi.get(libraryId as string), - enabled: !!libraryId, - staleTime: 30_000, - }); -} - -/** Static-ish field-group catalog from the server (cached aggressively). */ -export function useFieldGroups() { - return useQuery({ - queryKey: [...fieldGroupsKey], - queryFn: () => metadataRefreshApi.listFieldGroups(), - staleTime: 60 * 60 * 1000, - gcTime: 24 * 60 * 60 * 1000, - }); -} - -/** Save a partial config update. */ -export function useUpdateMetadataRefreshConfig(libraryId: string) { - const queryClient = useQueryClient(); - - return useMutation< - MetadataRefreshConfig, - Error & { response?: { data?: { error?: string } } }, - MetadataRefreshConfigPatchInput - >({ - mutationFn: (patch) => metadataRefreshApi.update(libraryId, patch), - onSuccess: (data) => { - queryClient.setQueryData(configKey(libraryId), data); - notifications.show({ - title: "Schedule saved", - message: data.enabled - ? "Scheduled metadata refresh is enabled." - : "Scheduled metadata refresh is disabled.", - color: "green", - }); - }, - onError: (error) => { - notifications.show({ - title: "Could not save schedule", - message: - error.response?.data?.error || error.message || "Unknown error", - color: "red", - }); - }, - }); -} - -/** - * Trigger an immediate refresh task. The returned response carries the task - * ID; callers can pipe that into `useTaskProgress().getTask(id)` to render - * progress. - */ -export function useRunMetadataRefreshNow(libraryId: string) { - return useMutation< - RunNowResponse, - Error & { response?: { data?: { error?: string } } } - >({ - mutationFn: () => metadataRefreshApi.runNow(libraryId), - onSuccess: () => { - notifications.show({ - title: "Metadata refresh started", - message: "Tracking progress in the background.", - color: "blue", - }); - }, - onError: (error) => { - notifications.show({ - title: "Could not start refresh", - message: - error.response?.data?.error || error.message || "Unknown error", - color: "red", - }); - }, - }); -} - -/** Synchronous dry-run preview. */ -export function useDryRunMetadataRefresh(libraryId: string) { - return useMutation< - DryRunResponse, - Error & { response?: { data?: { error?: string } } }, - DryRunRequest | undefined - >({ - mutationFn: (request) => metadataRefreshApi.dryRun(libraryId, request), - onError: (error) => { - notifications.show({ - title: "Preview failed", - message: - error.response?.data?.error || error.message || "Unknown error", - color: "red", - }); - }, - }); -} diff --git a/web/src/pages/LibraryJobs.tsx b/web/src/pages/LibraryJobs.tsx new file mode 100644 index 00000000..e5366c5b --- /dev/null +++ b/web/src/pages/LibraryJobs.tsx @@ -0,0 +1,138 @@ +import { + Anchor, + Breadcrumbs, + Button, + Center, + Container, + Group, + Loader, + Stack, + Text, + Title, +} from "@mantine/core"; +import { useDisclosure } from "@mantine/hooks"; +import { IconArrowLeft, IconPlus } from "@tabler/icons-react"; +import { useQuery } from "@tanstack/react-query"; +import { useState } from "react"; +import { useNavigate, useParams } from "react-router-dom"; +import { librariesApi } from "@/api/libraries"; +import type { LibraryJob } from "@/api/libraryJobs"; +import { JobList } from "@/components/library-jobs/LibraryJobsList"; +import { JobEditor } from "@/components/library-jobs/MetadataRefreshJobEditor"; +import { useDynamicDocumentTitle } from "@/hooks/useDocumentTitle"; +import { + useDeleteLibraryJob, + useLibraryJobsList, +} from "@/hooks/useLibraryJobs"; + +export function LibraryJobsPage() { + const { libraryId } = useParams<{ libraryId: string }>(); + const navigate = useNavigate(); + const [editorOpened, editor] = useDisclosure(false); + const [editingJob, setEditingJob] = useState(null); + + const library = useQuery({ + queryKey: ["library", libraryId], + queryFn: () => librariesApi.getById(libraryId as string), + enabled: Boolean(libraryId), + }); + + const jobsQuery = useLibraryJobsList(libraryId ?? ""); + const deleteMutation = useDeleteLibraryJob(libraryId ?? ""); + + useDynamicDocumentTitle( + library.data ? `Scheduled Jobs · ${library.data.name}` : "Scheduled Jobs", + ); + + if (!libraryId) return null; + + if (library.isLoading) { + return ( + +
+ +
+
+ ); + } + + const handleAdd = () => { + setEditingJob(null); + editor.open(); + }; + + const handleEdit = (job: LibraryJob) => { + setEditingJob(job); + editor.open(); + }; + + const handleDelete = (job: LibraryJob) => { + if (!window.confirm(`Delete job "${job.name}"? This cannot be undone.`)) { + return; + } + deleteMutation.mutate(job.id); + }; + + return ( + + + + navigate("/libraries")} component="button"> + Libraries + + {library.data && ( + navigate(`/libraries/${library.data.id}`)} + component="button" + > + {library.data.name} + + )} + Scheduled Jobs + + + + + Scheduled Jobs + + Each job runs on its own cron, against one provider, and writes + the field groups you select. Multiple jobs per library are + supported. + + + + + + + + + + + + + + ); +} diff --git a/web/src/types/api.generated.ts b/web/src/types/api.generated.ts index 53e5c17a..c835d400 100644 --- a/web/src/types/api.generated.ts +++ b/web/src/types/api.generated.ts @@ -8622,6 +8622,15 @@ export interface components { */ voteCount?: number | null; }; + /** @description Request body for `POST /api/v1/libraries/{id}/jobs`. */ + CreateLibraryJobRequest: { + config: components["schemas"]["LibraryJobConfigDto"]; + cronSchedule: string; + enabled?: boolean; + /** @description Optional user-facing name. Auto-generated when missing or empty. */ + name?: string | null; + timezone?: string | null; + }; /** @description Create library request */ CreateLibraryRequest: { /** @@ -8952,6 +8961,10 @@ export interface components { /** @description Whether the dismissal was recorded */ dismissed: boolean; }; + DryRunFieldChange: { + after: unknown; + before: unknown; + }; /** * @description Dry-run preview attached to [`MetadataApplyResponse`] when the request * set `dryRun = true`. Absent on real applies. @@ -8959,65 +8972,51 @@ export interface components { DryRunReportDto: { changes: components["schemas"]["FieldChangeDto"][]; }; - /** - * @description Body for `POST /libraries/{id}/metadata-refresh/dry-run`. - * - * `configOverride` lets the UI preview a config that hasn't been saved yet — - * "what would happen if I clicked Save right now?". When absent, the saved - * config is used. - */ + /** @description Request body for `POST .../dry-run`. */ DryRunRequest: { - /** @default null */ - configOverride: null | components["schemas"]["MetadataRefreshConfigDto"]; + configOverride?: null | components["schemas"]["LibraryJobConfigDto"]; /** * Format: int32 - * @description Number of series to preview. Defaults to 5, capped at 20. - * @default null + * @description Sample size, capped at 20 server-side. */ - sampleSize: number | null; + sampleSize?: number | null; }; - /** @description Full dry-run response. */ DryRunResponse: { /** * Format: int32 - * @description Estimated `(series, provider)` pairs the planner skipped because the - * series has no stored external ID for the provider (strict mode only). + * @description Estimated count of series that would be skipped because they have no + * external ID for the chosen provider. */ estSkippedNoId: number; /** * Format: int32 - * @description Estimated pairs skipped because their `last_synced_at` is younger than - * the recency cutoff. + * @description Estimated count of series that would be skipped because they were + * recently synced. */ estSkippedRecentlySynced: number; - /** @description Per-series deltas, capped at the requested sample size. */ + /** @description Provider resolution failure reason, if any. */ + planFailure?: string | null; + /** @description Per-series deltas for the first N eligible series. */ sample: components["schemas"]["DryRunSeriesDelta"][]; /** * Format: int32 - * @description Total number of `(series, provider)` pairs the planner produced before - * the sample cap. + * @description Total number of series eligible to be refreshed (all of them, not + * just the sample). */ totalEligible: number; - /** - * @description Provider strings from the config that don't resolve to an enabled - * plugin. Surfaced verbatim so the UI can highlight typos or disabled - * plugins. - */ - unresolvedProviders?: string[]; }; - /** @description One series' would-be deltas in a dry-run preview. */ + /** @description One series's preview of would-be field changes. */ DryRunSeriesDelta: { - /** @description Fields that would be written. */ - changes: components["schemas"]["FieldChangeDto"][]; - /** @description Plugin id (`"plugin:"`) that produced this delta. */ - provider: string; + /** @description Field name → `(before, after)` JSON values. */ + changes: { + [key: string]: components["schemas"]["DryRunFieldChange"]; + }; /** Format: uuid */ seriesId: string; - seriesTitle: string; - /** @description Fields that would be skipped (locked, no permission, etc.). */ - skipped?: components["schemas"]["DryRunSkippedFieldDto"][]; + seriesName: string; + /** @description Fields that would have been written but were skipped (locks, all-locked, etc.) */ + skipped: components["schemas"]["DryRunSkippedFieldDto"][]; }; - /** @description A field skipped during a dry-run apply, with the reason. */ DryRunSkippedFieldDto: { field: string; reason: string; @@ -9537,16 +9536,10 @@ export interface components { before?: unknown; field: string; }; - /** @description One entry from `GET /api/v1/metadata-refresh/field-groups`. */ + /** @description Static field-group catalog row exposed for the editor UI. */ FieldGroupDto: { - /** - * @description camelCase field names this group expands into. Match the - * `should_apply_field` call sites in `MetadataApplier`. - */ fields: string[]; - /** @description Snake_case identifier stored in [`MetadataRefreshConfig::field_groups`]. */ id: string; - /** @description Human-readable label for UI display. */ label: string; }; /** @description Operators for string and equality comparisons */ @@ -11071,6 +11064,36 @@ export interface components { */ updatedAt: string; }; + /** + * @description Type-discriminated job config exposed over the wire. + * + * Phase 9 only ships the `metadata_refresh` variant; future job types + * extend the enum. + */ + LibraryJobConfigDto: components["schemas"]["MetadataRefreshJobConfigDto"] & { + /** @enum {string} */ + type: "metadata_refresh"; + }; + /** @description Library job row exposed via GET / list / response. */ + LibraryJobDto: { + config: components["schemas"]["LibraryJobConfigDto"]; + /** Format: date-time */ + createdAt: string; + cronSchedule: string; + enabled: boolean; + /** Format: uuid */ + id: string; + /** Format: date-time */ + lastRunAt?: string | null; + lastRunMessage?: string | null; + lastRunStatus?: string | null; + /** Format: uuid */ + libraryId: string; + name: string; + timezone?: string | null; + /** Format: date-time */ + updatedAt: string; + }; /** @description Metrics for a single library */ LibraryMetricsDto: { /** @@ -11126,6 +11149,10 @@ export interface components { */ totalGroups: number; }; + /** @description Response for `GET /libraries/{id}/jobs`. */ + ListLibraryJobsResponse: { + jobs: components["schemas"]["LibraryJobDto"][]; + }; /** @description Query parameters for listing settings */ ListSettingsQuery: { /** @@ -11519,62 +11546,32 @@ export interface components { /** @description Summary counts */ summary: components["schemas"]["PreviewSummary"]; }; - /** - * @description Full read response for a library's scheduled metadata-refresh config. - * - * When the library has no stored config, the server returns - * [`MetadataRefreshConfig::default`] so clients always render something. - */ - MetadataRefreshConfigDto: { - cronSchedule: string; - enabled: boolean; - existingSourceIdsOnly: boolean; - extraFields: string[]; - fieldGroups: string[]; - /** Format: int32 */ - maxConcurrency: number; - perProviderOverrides?: { - [key: string]: components["schemas"]["ProviderOverrideDto"]; - } | null; - providers: string[]; - /** Format: int32 */ - skipRecentlySyncedWithinS: number; - timezone?: string | null; - }; - /** - * @description Partial PATCH body. Uses [`PatchValue`] for nullable fields so clients can - * distinguish "leave alone" from "explicit clear". - * - * All other fields use plain `Option` because clearing a non-nullable - * field doesn't make sense (e.g. you can't "unset" `enabled` — you can only - * flip it). - */ - MetadataRefreshConfigPatchDto: { - /** @default null */ - cronSchedule: string | null; - /** @default null */ - enabled: boolean | null; - /** @default null */ - existingSourceIdsOnly: boolean | null; - /** @default null */ - extraFields: string[] | null; - /** @default null */ - fieldGroups: string[] | null; + /** @description Wire shape for the metadata-refresh job config. */ + MetadataRefreshJobConfigDto: { + /** @description Reserved for the book-scope future work. */ + bookExtraFields?: string[]; + /** @description Reserved for the book-scope future work. */ + bookFieldGroups?: string[]; + /** @description When true, the planner skips series with no stored external ID. */ + existingSourceIdsOnly?: boolean; + /** @description Series-side individual field overrides (camelCase). */ + extraFields?: string[]; + /** @description Series-side field groups (snake_case identifiers). */ + fieldGroups?: string[]; /** * Format: int32 - * @default null + * @description Per-task fan-out; clamped at run time. */ - maxConcurrency: number | null; - perProviderOverrides?: Record | null; - /** @default null */ - providers: string[] | null; + maxConcurrency?: number; + /** @description Plugin reference, e.g. `"plugin:mangabaka"`. */ + provider: string; + /** @description Refresh scope. Phase 9 only honours `series_only` at runtime. */ + scope?: components["schemas"]["RefreshScope"]; /** * Format: int32 - * @default null + * @description Skip series whose `last_synced_at` is younger than this many seconds. */ - skipRecentlySyncedWithinS: number | null; - /** @default null */ - timezone: string | null; + skipRecentlySyncedWithinS?: number; }; /** @description Response for cleanup operation */ MetricsCleanupResponse: { @@ -12915,6 +12912,20 @@ export interface components { */ title?: string | null; }; + /** + * @description Request body for `PATCH /api/v1/libraries/{id}/jobs/{job_id}`. + * + * All fields are optional. Top-level fields use [`PatchValue`] when their + * underlying type is `Option<...>` so an explicit `null` clears the value + * distinct from "not present". + */ + PatchLibraryJobRequest: { + config?: null | components["schemas"]["LibraryJobConfigDto"]; + cronSchedule?: string | null; + enabled?: boolean | null; + name?: string | null; + timezone?: string | null; + }; /** * @description PATCH request for partial update of series metadata * @@ -13070,6 +13081,7 @@ export interface components { PluginActionDto: { /** @description Action type (e.g., "metadata_search", "metadata_get") */ actionType: string; + capabilities?: null | components["schemas"]["PluginCapabilitiesDto"]; /** @description Description of the action */ description?: string | null; /** @description Icon hint for UI (optional) */ @@ -13743,14 +13755,6 @@ export interface components { /** @description Number of fields that will be applied */ willApply: number; }; - /** - * @description Per-provider override placeholder (Phase 8). Mirrors - * [`crate::services::metadata::ProviderOverride`] for the wire format. - */ - ProviderOverrideDto: { - extraFields?: string[]; - fieldGroups?: string[]; - }; /** * @description Public setting DTO (for non-admin users) * @@ -14076,6 +14080,14 @@ export interface components { /** @description Status of a running/pending background task ("pending" or "running"), if any */ taskStatus?: string | null; }; + /** + * @description Scope of a metadata refresh job. + * + * Phase 9 only honours [`RefreshScope::SeriesOnly`] at runtime. The + * other variants are schema-accepted but rejected by the validator. + * @enum {string} + */ + RefreshScope: "series_only" | "books_only" | "series_and_books"; /** @description Register request */ RegisterRequest: { /** @@ -14539,13 +14551,9 @@ export interface components { */ tasksEnqueued: number; }; - /** @description Response for `POST /libraries/{id}/metadata-refresh/run-now`. */ + /** @description Response for `POST .../run-now`. */ RunNowResponse: { - /** - * Format: uuid - * @description Background task ID. Subscribe to events on `/api/v1/events/stream` to - * follow progress. - */ + /** Format: uuid */ taskId: string; }; /** @description Scan status response */ @@ -16069,7 +16077,7 @@ export interface components { type: "refresh_metadata"; } | { /** Format: uuid */ - libraryId: string; + jobId: string; /** @enum {string} */ type: "refresh_library_metadata"; } | { From 308109e40cc4639a5d51c701f3e9e03e7049411d Mon Sep 17 00:00:00 2001 From: Sylvain Cau Date: Sun, 3 May 2026 17:43:50 -0700 Subject: [PATCH 11/12] ui(library-jobs): present recency guard in hours instead of seconds The "Skip if synced within" field on the metadata refresh job editor took a value in seconds, which forced users to think in seconds at the form level (3600, 21600, 86400) for what is conceptually an hours-scale threshold. The seconds presentation also made it harder to spot when the default 3600 (= 1h) was masking a "freshly synced, all skipped" outcome on a brand-new library. The wire format and persisted value are unchanged: skipRecentlySyncedWithinS still ships in seconds, the planner cutoff math is unchanged, and the default of 3600s is preserved. Only the input and label change: divide by 3600 on display, multiply by 3600 on save. --- .../components/library-jobs/MetadataRefreshJobEditor.tsx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/web/src/components/library-jobs/MetadataRefreshJobEditor.tsx b/web/src/components/library-jobs/MetadataRefreshJobEditor.tsx index 5075e153..0a9cef0c 100644 --- a/web/src/components/library-jobs/MetadataRefreshJobEditor.tsx +++ b/web/src/components/library-jobs/MetadataRefreshJobEditor.tsx @@ -453,11 +453,14 @@ export function JobEditor({ libraryId, opened, onClose, job }: JobEditorProps) { onChange={(e) => setExistingOnly(e.currentTarget.checked)} /> setSkipRecent(typeof v === "number" ? v : 3600)} + value={skipRecent / 3600} + onChange={(v) => + setSkipRecent(typeof v === "number" ? Math.max(0, v) * 3600 : 3600) + } min={0} + step={1} /> Date: Sun, 3 May 2026 18:28:32 -0700 Subject: [PATCH 12/12] docs(seed): clarify credentials must be a YAML mapping, not a quoted JSON string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seed config sample showed plugin credentials as a one-line flow mapping, which invites users to wrap it in quotes — turning it into a YAML string. The encryption layer then stores that raw string instead of an object, and the plugin receives garbage when it looks up `credentials.api_key`. Switch the example to block-mapping form and add a comment warning against quoting the value. --- config/seed-config.sample.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/config/seed-config.sample.yaml b/config/seed-config.sample.yaml index 30f55cf3..e63987ae 100644 --- a/config/seed-config.sample.yaml +++ b/config/seed-config.sample.yaml @@ -29,7 +29,9 @@ users: # For production, use npx: command: "npx", args: ["-y", "@ashdev/codex-plugin-"] # # credential_delivery options: "init_message" (default), "env", "both" -# credentials: JSON object with API keys/tokens (optional, can be set later via UI) +# credentials: YAML mapping of API keys/tokens (optional, can be set later via UI). +# Use a YAML mapping, NOT a quoted JSON string — quoting turns it into a string +# and the plugin will receive the raw text instead of an object. # # NOTE: If credentials are provided, CODEX_ENCRYPTION_KEY must be set. plugins: @@ -48,7 +50,8 @@ plugins: - "library:detail" - "library:scan" credential_delivery: init_message - # credentials: { "api_key": "your-mangabaka-api-key" } + # credentials: + # api_key: "your-mangabaka-api-key" # Echo - Test/debug plugin (no credentials needed) - name: metadata-echo