Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and versions match the minimum IPA version required to use functionality.


## [v7.2.3] - 2026-01-30

### Added

- Filter out deleted and retrieved submissions when polling in `AutoReviewPoller` and
`DownstreamPoller`. This makes submissions that can't be processed automatically drop
off (deleted submsissions), and provides an API-based mechanism to force a
problematic submission to drop off Auto Review without failing it in the DB
(marking retrieved).
- Normalize edge cases where Form Extraction bounding boxes are `float`s.
- Support idiomatic `copy.deepcopy()` and `copy.replace()` of `Prediction`s and
`PredicitonList`s via `__deepcopy__` and `__replace__`.

### Changed

- Simplify type annotations for `retry()` decorator.

### Removed

- `Prediction.copy()` in favor of `copy.deepcopy()` and `copy.replace()`.


## [v7.2.2] - 2025-10-14

### Added
Expand Down Expand Up @@ -293,7 +315,8 @@ This is the first major version release tested to work on Indico 6.X.
- Row Association now also sorting on 'bbtop'.


[v7.2.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.1...v7.2.2
[v7.2.3]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.2...v7.2.3
[v7.2.2]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.1...v7.2.2
[v7.2.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.0...v7.2.1
[v7.2.0]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.2...v7.2.0
[v6.14.2]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.1...v6.14.2
Expand Down
2 changes: 1 addition & 1 deletion indico_toolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
"ToolkitStaggeredLoopError",
"ToolkitStatusError",
)
__version__ = "7.2.2"
__version__ = "7.2.3"
25 changes: 17 additions & 8 deletions indico_toolkit/polling/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ class SubmissionIdsPendingAutoReview(GraphQLRequest): # type: ignore[misc, no-a
query SubmissionIdsPendingAutoReview($workflowIds: [Int]) {
submissions(
desc: false
filters: { status: PENDING_AUTO_REVIEW }
filters: {
AND: [
{ status: PENDING_AUTO_REVIEW }
{ filesDeleted: false }
{ retrieved: false }
]
}
limit: 1000
orderBy: ID
workflowIds: $workflowIds
Expand Down Expand Up @@ -39,13 +45,16 @@ class SubmissionIdsPendingDownstream(GraphQLRequest): # type: ignore[misc, no-a
submissions(
desc: false
filters: {
AND: {
retrieved: false
OR: [
{ status: COMPLETE }
{ status: FAILED }
]
}
AND: [
{
OR: [
{ status: COMPLETE }
{ status: FAILED }
]
}
{ filesDeleted: false }
{ retrieved: false }
]
}
limit: 1000
orderBy: ID
Expand Down
7 changes: 7 additions & 0 deletions indico_toolkit/results/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ def normalize_prediction_dict(task_type: TaskType, prediction: "Any") -> None:
) and not has(prediction, list, "spans"):
prediction["spans"] = []

# Form Extraction bounding boxes may very rarely have a `.0` decimal place,
# which causes them to fail strict validation.
if task_type == TaskType.FORM_EXTRACTION:
for edge in ("top", "left", "right", "bottom"):
if has(prediction, float, edge):
prediction[edge] = int(prediction[edge])

# Form Extractions added in review may lack bounding box information.
# These values will match `NULL_BOX`.
if task_type == TaskType.FORM_EXTRACTION and not has(prediction, int, "top"):
Expand Down
12 changes: 12 additions & 0 deletions indico_toolkit/results/predictions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
from typing import TYPE_CHECKING

from ..normalization import normalize_prediction_dict
Expand Down Expand Up @@ -55,3 +56,14 @@ def from_dict(
return Unbundling.from_dict(document, task, review, prediction)
else:
raise ValueError(f"unsupported task type {task.type!r}")


# `dataclass()` doesn't (yet) provide a way to configure the generated `__replace__`
# method on Python 3.13+. Unshadow `Prediction.__replace__` in generated subclasses.
if sys.version_info >= (3, 13):
del Classification.__replace__
del DocumentExtraction.__replace__
del Extraction.__replace__
del FormExtraction.__replace__
del Summarization.__replace__
del Unbundling.__replace__
29 changes: 15 additions & 14 deletions indico_toolkit/results/predictions/documentextraction.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from copy import copy, deepcopy
from dataclasses import dataclass, field, replace
from copy import copy
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any

from ...etloutput import (
Expand Down Expand Up @@ -131,6 +131,19 @@ def table_cells(self, table_cells: "Iterable[tuple[Table, Cell]]") -> None:
self.tables.append(table)
self.cells.append(cell)

def __deepcopy__(self, memo: Any) -> "Self":
"""
Supports `copy.deepcopy(prediction)` without copying immutable objects.
This provides a significant time and memory improvement when OCR is assigned.
"""
new_instance = super().__deepcopy__(memo)
new_instance.groups = copy(self.groups)
new_instance.spans = copy(self.spans)
new_instance.tokens = copy(self.tokens)
new_instance.tables = copy(self.tables)
new_instance.cells = copy(self.cells)
return new_instance

@staticmethod
def from_dict(
document: "Document",
Expand Down Expand Up @@ -190,15 +203,3 @@ def to_dict(self) -> "dict[str, Any]":
prediction["rejected"] = True

return prediction

def copy(self) -> "Self":
return replace(
self,
groups=copy(self.groups),
spans=copy(self.spans),
tokens=copy(self.tokens),
tables=copy(self.tables),
cells=copy(self.cells),
confidences=copy(self.confidences),
extras=deepcopy(self.extras),
)
43 changes: 36 additions & 7 deletions indico_toolkit/results/predictions/prediction.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from copy import copy, deepcopy
from dataclasses import dataclass, replace
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
Expand Down Expand Up @@ -28,15 +28,44 @@ def confidence(self) -> float:
def confidence(self, value: float) -> None:
self.confidences[self.label] = value

def __deepcopy__(self, memo: Any) -> "Self":
"""
Supports `copy.deepcopy(prediction)` without copying immutable objects.
"""
new_instance = copy(self)
new_instance.confidences = copy(self.confidences)
new_instance.extras = deepcopy(self.extras, memo)
return new_instance

def __replace__override__(self, **attributes: Any) -> "Self":
"""
Supports `copy.replace(prediction, **attrs)` on Python 3.13+

Unlike `dataclasses.replace(**attrs)` this performs a deep copy and allows
assigning properties in addition to attributes.

E.g.
>>> dataclasses.replace(prediction, confidence=1.0)
Shallow copy and raises TypeError(...)
>>> copy.replace(prediction, confidence=1.0)
Deep copy and returns Prediction(confidence=1.0, ...)
"""
new_instance = deepcopy(self)

for attribute, value in attributes.items():
setattr(new_instance, attribute, value)

return new_instance

def to_dict(self) -> "dict[str, Any]":
"""
Create a prediction dictionary for auto review changes.
"""
raise NotImplementedError()

def copy(self) -> "Self":
return replace(
self,
confidences=copy(self.confidences),
extras=deepcopy(self.extras),
)

# `dataclass()` doesn't (yet) provide a way to override the generated `__replace__`
# method on Python 3.13+. It must be overridden after class generation and unshadowed
# on all derived classes.
Prediction.__replace__ = Prediction.__replace__override__ # type:ignore
del Prediction.__replace__override__
18 changes: 9 additions & 9 deletions indico_toolkit/results/predictions/summarization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from copy import copy, deepcopy
from copy import copy
from dataclasses import dataclass, replace
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -70,6 +70,14 @@ def span(self, span: "Span") -> None:
"""
self.citation = replace(self.citation, span=span)

def __deepcopy__(self, memo: Any) -> "Self":
"""
Supports `copy.deepcopy(prediction)` without copying immutable objects.
"""
new_instance = super().__deepcopy__(memo)
new_instance.citations = copy(self.citations)
return new_instance

@staticmethod
def from_dict(
document: "Document",
Expand Down Expand Up @@ -125,11 +133,3 @@ def to_dict(self) -> "dict[str, Any]":
prediction["rejected"] = True

return prediction

def copy(self) -> "Self":
return replace(
self,
citations=copy(self.citations),
confidences=copy(self.confidences),
extras=deepcopy(self.extras),
)
20 changes: 10 additions & 10 deletions indico_toolkit/results/predictions/unbundling.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from copy import copy, deepcopy
from dataclasses import dataclass, replace
from copy import copy
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from ...etloutput import Span
Expand All @@ -25,6 +25,14 @@ def pages(self) -> "tuple[int, ...]":
"""
return tuple(span.page for span in self.spans)

def __deepcopy__(self, memo: Any) -> "Self":
"""
Supports `copy.deepcopy(prediction)` without copying immutable objects.
"""
new_instance = super().__deepcopy__(memo)
new_instance.spans = copy(self.spans)
return new_instance

@staticmethod
def from_dict(
document: "Document",
Expand Down Expand Up @@ -55,11 +63,3 @@ def to_dict(self) -> "dict[str, Any]":
"confidence": self.confidences,
"spans": [span.to_dict() for span in self.spans],
}

def copy(self) -> "Self":
return replace(
self,
spans=copy(self.spans),
confidences=copy(self.confidences),
extras=deepcopy(self.extras),
)
13 changes: 12 additions & 1 deletion indico_toolkit/results/result.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataclasses import dataclass
from copy import deepcopy
from dataclasses import dataclass, replace
from functools import partial
from itertools import chain
from typing import TYPE_CHECKING, Any

from . import predictions as prediction
from .document import Document
Expand All @@ -11,6 +13,9 @@
from .task import Task
from .utils import get

if TYPE_CHECKING:
from typing_extensions import Self


@dataclass(frozen=True, order=True)
class Result:
Expand Down Expand Up @@ -44,6 +49,12 @@ def admin_review(self) -> "PredictionList[Prediction]":
def final(self) -> "PredictionList[Prediction]":
return self.predictions.where(review=self.reviews[-1] if self.reviews else None)

def __deepcopy__(self, memo: Any) -> "Self":
"""
Supports `copy.deepcopy(result)` without copying immutable objects.
"""
return replace(self, predictions=deepcopy(self.predictions, memo))

@staticmethod
def from_dict(result: object) -> "Result":
"""
Expand Down
21 changes: 10 additions & 11 deletions indico_toolkit/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from typing import ParamSpec, TypeVar

ArgumentsType = ParamSpec("ArgumentsType")
OuterReturnType = TypeVar("OuterReturnType")
InnerReturnType = TypeVar("InnerReturnType")
ReturnType = TypeVar("ReturnType")


def retry(
Expand All @@ -20,7 +19,7 @@ def retry(
wait: float = 1,
backoff: float = 4,
jitter: float = 0.5,
) -> "Callable[[Callable[ArgumentsType, OuterReturnType]], Callable[ArgumentsType, OuterReturnType]]": # noqa: E501
) -> "Callable[[Callable[ArgumentsType, ReturnType]], Callable[ArgumentsType, ReturnType]]": # noqa: E501
"""
Decorate a function or coroutine to retry when it raises specified errors,
apply exponential backoff and jitter to the wait time,
Expand All @@ -46,15 +45,15 @@ def wait_time(times_retried: int) -> float:

@overload
def retry_decorator(
decorated: "Callable[ArgumentsType, Awaitable[InnerReturnType]]",
) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]]": ...
decorated: "Callable[ArgumentsType, Awaitable[ReturnType]]",
) -> "Callable[ArgumentsType, Awaitable[ReturnType]]": ...
@overload
def retry_decorator(
decorated: "Callable[ArgumentsType, InnerReturnType]",
) -> "Callable[ArgumentsType, InnerReturnType]": ...
decorated: "Callable[ArgumentsType, ReturnType]",
) -> "Callable[ArgumentsType, ReturnType]": ...
def retry_decorator(
decorated: "Callable[ArgumentsType, InnerReturnType]",
) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]] | Callable[ArgumentsType, InnerReturnType]": # noqa: E501
decorated: "Callable[ArgumentsType, ReturnType]",
) -> "Callable[ArgumentsType, Awaitable[ReturnType]] | Callable[ArgumentsType, ReturnType]": # noqa: E501
"""
Decorate either a function or coroutine as appropriate.
"""
Expand All @@ -63,7 +62,7 @@ def retry_decorator(
@wraps(decorated)
async def retrying_coroutine( # type: ignore[return]
*args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs"
) -> "InnerReturnType":
) -> "ReturnType":
for times_retried in range(count + 1):
try:
return await decorated(*args, **kwargs) # type: ignore[no-any-return]
Expand All @@ -79,7 +78,7 @@ async def retrying_coroutine( # type: ignore[return]
@wraps(decorated)
def retrying_function( # type: ignore[return]
*args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs"
) -> "InnerReturnType":
) -> "ReturnType":
for times_retried in range(count + 1):
try:
return decorated(*args, **kwargs)
Expand Down
Loading
Loading