IndicoDataSolutions · mawelborn · Feb 3, 2026 · Jan 12, 2026 · Jan 12, 2026 · Jan 21, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and versions match the minimum IPA version required to use functionality.
 
 
+## [v7.2.3] - 2026-01-30
+
+### Added
+
+- Filter out deleted and retrieved submissions when polling in `AutoReviewPoller` and
+  `DownstreamPoller`. This makes submissions that can't be processed automatically drop
+  off (deleted submsissions), and provides an API-based mechanism to force a
+  problematic submission to drop off Auto Review without failing it in the DB
+  (marking retrieved).
+- Normalize edge cases where Form Extraction bounding boxes are `float`s.
+- Support idiomatic `copy.deepcopy()` and `copy.replace()` of `Prediction`s and
+  `PredicitonList`s via `__deepcopy__` and `__replace__`.
+
+### Changed
+
+- Simplify type annotations for `retry()` decorator.
+
+### Removed
+
+- `Prediction.copy()` in favor of `copy.deepcopy()` and `copy.replace()`.
+
+
 ## [v7.2.2] - 2025-10-14
 
 ### Added
@@ -293,7 +315,8 @@ This is the first major version release tested to work on Indico 6.X.
 - Row Association now also sorting on 'bbtop'.
 
 
-[v7.2.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.1...v7.2.2
+[v7.2.3]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.2...v7.2.3
+[v7.2.2]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.1...v7.2.2
 [v7.2.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.0...v7.2.1
 [v7.2.0]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.2...v7.2.0
 [v6.14.2]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.1...v6.14.2

diff --git a/indico_toolkit/__init__.py b/indico_toolkit/__init__.py
@@ -21,4 +21,4 @@
     "ToolkitStaggeredLoopError",
     "ToolkitStatusError",
 )
-__version__ = "7.2.2"
+__version__ = "7.2.3"
diff --git a/indico_toolkit/polling/queries.py b/indico_toolkit/polling/queries.py
@@ -11,7 +11,13 @@ class SubmissionIdsPendingAutoReview(GraphQLRequest):  # type: ignore[misc, no-a
     query SubmissionIdsPendingAutoReview($workflowIds: [Int]) {
         submissions(
             desc: false
-            filters: { status: PENDING_AUTO_REVIEW }
+            filters: {
+                AND: [
+                    { status: PENDING_AUTO_REVIEW }
+                    { filesDeleted: false }
+                    { retrieved: false }
+                ]
+            }
             limit: 1000
             orderBy: ID
             workflowIds: $workflowIds
@@ -39,13 +45,16 @@ class SubmissionIdsPendingDownstream(GraphQLRequest):  # type: ignore[misc, no-a
         submissions(
             desc: false
             filters: {
-                AND: {
-                    retrieved: false
-                    OR: [
-                        { status: COMPLETE }
-                        { status: FAILED }
-                    ]
-                }
+                AND: [
+                    {
+                        OR: [
+                            { status: COMPLETE }
+                            { status: FAILED }
+                        ]
+                    }
+                    { filesDeleted: false }
+                    { retrieved: false }
+                ]
             }
             limit: 1000
             orderBy: ID

diff --git a/indico_toolkit/results/normalization.py b/indico_toolkit/results/normalization.py
@@ -54,6 +54,13 @@ def normalize_prediction_dict(task_type: TaskType, prediction: "Any") -> None:
     ) and not has(prediction, list, "spans"):
         prediction["spans"] = []
 
+    # Form Extraction bounding boxes may very rarely have a `.0` decimal place,
+    # which causes them to fail strict validation.
+    if task_type == TaskType.FORM_EXTRACTION:
+        for edge in ("top", "left", "right", "bottom"):
+            if has(prediction, float, edge):
+                prediction[edge] = int(prediction[edge])
+
     # Form Extractions added in review may lack bounding box information.
     # These values will match `NULL_BOX`.
     if task_type == TaskType.FORM_EXTRACTION and not has(prediction, int, "top"):

diff --git a/indico_toolkit/results/predictions/__init__.py b/indico_toolkit/results/predictions/__init__.py
@@ -1,3 +1,4 @@
+import sys
 from typing import TYPE_CHECKING
 
 from ..normalization import normalize_prediction_dict
@@ -55,3 +56,14 @@ def from_dict(
         return Unbundling.from_dict(document, task, review, prediction)
     else:
         raise ValueError(f"unsupported task type {task.type!r}")
+
+
+# `dataclass()` doesn't (yet) provide a way to configure the generated `__replace__`
+# method on Python 3.13+. Unshadow `Prediction.__replace__` in generated subclasses.
+if sys.version_info >= (3, 13):
+    del Classification.__replace__
+    del DocumentExtraction.__replace__
+    del Extraction.__replace__
+    del FormExtraction.__replace__
+    del Summarization.__replace__
+    del Unbundling.__replace__
diff --git a/indico_toolkit/results/predictions/documentextraction.py b/indico_toolkit/results/predictions/documentextraction.py
@@ -1,5 +1,5 @@
-from copy import copy, deepcopy
-from dataclasses import dataclass, field, replace
+from copy import copy
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
 
 from ...etloutput import (
@@ -131,6 +131,19 @@ def table_cells(self, table_cells: "Iterable[tuple[Table, Cell]]") -> None:
                 self.tables.append(table)
                 self.cells.append(cell)
 
+    def __deepcopy__(self, memo: Any) -> "Self":
+        """
+        Supports `copy.deepcopy(prediction)` without copying immutable objects.
+        This provides a significant time and memory improvement when OCR is assigned.
+        """
+        new_instance = super().__deepcopy__(memo)
+        new_instance.groups = copy(self.groups)
+        new_instance.spans = copy(self.spans)
+        new_instance.tokens = copy(self.tokens)
+        new_instance.tables = copy(self.tables)
+        new_instance.cells = copy(self.cells)
+        return new_instance
+
     @staticmethod
     def from_dict(
         document: "Document",
@@ -190,15 +203,3 @@ def to_dict(self) -> "dict[str, Any]":
             prediction["rejected"] = True
 
         return prediction
-
-    def copy(self) -> "Self":
-        return replace(
-            self,
-            groups=copy(self.groups),
-            spans=copy(self.spans),
-            tokens=copy(self.tokens),
-            tables=copy(self.tables),
-            cells=copy(self.cells),
-            confidences=copy(self.confidences),
-            extras=deepcopy(self.extras),
-        )
diff --git a/indico_toolkit/results/predictions/prediction.py b/indico_toolkit/results/predictions/prediction.py
@@ -1,5 +1,5 @@
 from copy import copy, deepcopy
-from dataclasses import dataclass, replace
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
@@ -28,15 +28,44 @@ def confidence(self) -> float:
     def confidence(self, value: float) -> None:
         self.confidences[self.label] = value
 
+    def __deepcopy__(self, memo: Any) -> "Self":
+        """
+        Supports `copy.deepcopy(prediction)` without copying immutable objects.
+        """
+        new_instance = copy(self)
+        new_instance.confidences = copy(self.confidences)
+        new_instance.extras = deepcopy(self.extras, memo)
+        return new_instance
+
+    def __replace__override__(self, **attributes: Any) -> "Self":
+        """
+        Supports `copy.replace(prediction, **attrs)` on Python 3.13+
+
+        Unlike `dataclasses.replace(**attrs)` this performs a deep copy and allows
+        assigning properties in addition to attributes.
+
+        E.g.
+        >>> dataclasses.replace(prediction, confidence=1.0)
+        Shallow copy and raises TypeError(...)
+        >>> copy.replace(prediction, confidence=1.0)
+        Deep copy and returns Prediction(confidence=1.0, ...)
+        """
+        new_instance = deepcopy(self)
+
+        for attribute, value in attributes.items():
+            setattr(new_instance, attribute, value)
+
+        return new_instance
+
     def to_dict(self) -> "dict[str, Any]":
         """
         Create a prediction dictionary for auto review changes.
         """
         raise NotImplementedError()
 
-    def copy(self) -> "Self":
-        return replace(
-            self,
-            confidences=copy(self.confidences),
-            extras=deepcopy(self.extras),
-        )
+
+# `dataclass()` doesn't (yet) provide a way to override the generated `__replace__`
+# method on Python 3.13+. It must be overridden after class generation and unshadowed
+# on all derived classes.
+Prediction.__replace__ = Prediction.__replace__override__  # type:ignore
+del Prediction.__replace__override__
diff --git a/indico_toolkit/results/predictions/summarization.py b/indico_toolkit/results/predictions/summarization.py
@@ -1,4 +1,4 @@
-from copy import copy, deepcopy
+from copy import copy
 from dataclasses import dataclass, replace
 from typing import TYPE_CHECKING, Any
 
@@ -70,6 +70,14 @@ def span(self, span: "Span") -> None:
         """
         self.citation = replace(self.citation, span=span)
 
+    def __deepcopy__(self, memo: Any) -> "Self":
+        """
+        Supports `copy.deepcopy(prediction)` without copying immutable objects.
+        """
+        new_instance = super().__deepcopy__(memo)
+        new_instance.citations = copy(self.citations)
+        return new_instance
+
     @staticmethod
     def from_dict(
         document: "Document",
@@ -125,11 +133,3 @@ def to_dict(self) -> "dict[str, Any]":
             prediction["rejected"] = True
 
         return prediction
-
-    def copy(self) -> "Self":
-        return replace(
-            self,
-            citations=copy(self.citations),
-            confidences=copy(self.confidences),
-            extras=deepcopy(self.extras),
-        )
diff --git a/indico_toolkit/results/predictions/unbundling.py b/indico_toolkit/results/predictions/unbundling.py
@@ -1,5 +1,5 @@
-from copy import copy, deepcopy
-from dataclasses import dataclass, replace
+from copy import copy
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
 from ...etloutput import Span
@@ -25,6 +25,14 @@ def pages(self) -> "tuple[int, ...]":
         """
         return tuple(span.page for span in self.spans)
 
+    def __deepcopy__(self, memo: Any) -> "Self":
+        """
+        Supports `copy.deepcopy(prediction)` without copying immutable objects.
+        """
+        new_instance = super().__deepcopy__(memo)
+        new_instance.spans = copy(self.spans)
+        return new_instance
+
     @staticmethod
     def from_dict(
         document: "Document",
@@ -55,11 +63,3 @@ def to_dict(self) -> "dict[str, Any]":
             "confidence": self.confidences,
             "spans": [span.to_dict() for span in self.spans],
         }
-
-    def copy(self) -> "Self":
-        return replace(
-            self,
-            spans=copy(self.spans),
-            confidences=copy(self.confidences),
-            extras=deepcopy(self.extras),
-        )
diff --git a/indico_toolkit/results/result.py b/indico_toolkit/results/result.py
@@ -1,6 +1,8 @@
-from dataclasses import dataclass
+from copy import deepcopy
+from dataclasses import dataclass, replace
 from functools import partial
 from itertools import chain
+from typing import TYPE_CHECKING, Any
 
 from . import predictions as prediction
 from .document import Document
@@ -11,6 +13,9 @@
 from .task import Task
 from .utils import get
 
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
 
 @dataclass(frozen=True, order=True)
 class Result:
@@ -44,6 +49,12 @@ def admin_review(self) -> "PredictionList[Prediction]":
     def final(self) -> "PredictionList[Prediction]":
         return self.predictions.where(review=self.reviews[-1] if self.reviews else None)
 
+    def __deepcopy__(self, memo: Any) -> "Self":
+        """
+        Supports `copy.deepcopy(result)` without copying immutable objects.
+        """
+        return replace(self, predictions=deepcopy(self.predictions, memo))
+
     @staticmethod
     def from_dict(result: object) -> "Result":
         """

diff --git a/indico_toolkit/retry.py b/indico_toolkit/retry.py
@@ -10,8 +10,7 @@
     from typing import ParamSpec, TypeVar
 
     ArgumentsType = ParamSpec("ArgumentsType")
-    OuterReturnType = TypeVar("OuterReturnType")
-    InnerReturnType = TypeVar("InnerReturnType")
+    ReturnType = TypeVar("ReturnType")
 
 
 def retry(
@@ -20,7 +19,7 @@ def retry(
     wait: float = 1,
     backoff: float = 4,
     jitter: float = 0.5,
-) -> "Callable[[Callable[ArgumentsType, OuterReturnType]], Callable[ArgumentsType, OuterReturnType]]":  # noqa: E501
+) -> "Callable[[Callable[ArgumentsType, ReturnType]], Callable[ArgumentsType, ReturnType]]":  # noqa: E501
     """
     Decorate a function or coroutine to retry when it raises specified errors,
     apply exponential backoff and jitter to the wait time,
@@ -46,15 +45,15 @@ def wait_time(times_retried: int) -> float:
 
     @overload
     def retry_decorator(
-        decorated: "Callable[ArgumentsType, Awaitable[InnerReturnType]]",
-    ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]]": ...
+        decorated: "Callable[ArgumentsType, Awaitable[ReturnType]]",
+    ) -> "Callable[ArgumentsType, Awaitable[ReturnType]]": ...
     @overload
     def retry_decorator(
-        decorated: "Callable[ArgumentsType, InnerReturnType]",
-    ) -> "Callable[ArgumentsType, InnerReturnType]": ...
+        decorated: "Callable[ArgumentsType, ReturnType]",
+    ) -> "Callable[ArgumentsType, ReturnType]": ...
     def retry_decorator(
-        decorated: "Callable[ArgumentsType, InnerReturnType]",
-    ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]] | Callable[ArgumentsType, InnerReturnType]":  # noqa: E501
+        decorated: "Callable[ArgumentsType, ReturnType]",
+    ) -> "Callable[ArgumentsType, Awaitable[ReturnType]] | Callable[ArgumentsType, ReturnType]":  # noqa: E501
         """
         Decorate either a function or coroutine as appropriate.
         """
@@ -63,7 +62,7 @@ def retry_decorator(
             @wraps(decorated)
             async def retrying_coroutine(  # type: ignore[return]
                 *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs"
-            ) -> "InnerReturnType":
+            ) -> "ReturnType":
                 for times_retried in range(count + 1):
                     try:
                         return await decorated(*args, **kwargs)  # type: ignore[no-any-return]
@@ -79,7 +78,7 @@ async def retrying_coroutine(  # type: ignore[return]
             @wraps(decorated)
             def retrying_function(  # type: ignore[return]
                 *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs"
-            ) -> "InnerReturnType":
+            ) -> "ReturnType":
                 for times_retried in range(count + 1):
                     try:
                         return decorated(*args, **kwargs)