diff --git a/src/borgstore/backends/_base.py b/src/borgstore/backends/_base.py index de4ee68..e122586 100644 --- a/src/borgstore/backends/_base.py +++ b/src/borgstore/backends/_base.py @@ -96,7 +96,11 @@ def info(self, name) -> ItemInfo: @abstractmethod def load(self, name: str, *, size=None, offset=0) -> bytes: - """load value from """ + """load value from + + If offset is negative, it is counted from the end of the file. + If size is None, the whole object starting at offset is loaded. + """ @abstractmethod def store(self, name: str, value: bytes) -> None: diff --git a/src/borgstore/backends/_utils.py b/src/borgstore/backends/_utils.py new file mode 100644 index 0000000..5badd55 --- /dev/null +++ b/src/borgstore/backends/_utils.py @@ -0,0 +1,59 @@ +""" +Utilities for backend implementations. +""" + +from typing import Tuple, Optional + + +def make_range_header(offset: int, size: Optional[int] = None, total_size: Optional[int] = None) -> Optional[str]: + """ + Generate a standards compliant HTTP Range header. + + :param offset: offset in bytes. If negative, it is counted from the end of the file. + :param size: number of bytes to load. If None, load until the end of the file. + :param total_size: total size of the file. Required if offset < 0 and size is not None. + :return: Range header value (e.g., "bytes=0-99") or None if no Range header is needed. + """ + if offset < 0: + if size is None: + return f"bytes={offset}" + else: + if total_size is None: + raise ValueError("total_size is required for negative offset with a specific size") + start = total_size + offset + return f"bytes={start}-{start + size - 1}" + else: + if size is None: + return f"bytes={offset}-" if offset > 0 else None + else: + return f"bytes={offset}-{offset + size - 1}" + + +def parse_range_header(range_header: str) -> Tuple[int, Optional[int]]: + """ + Parse a standards compliant HTTP Range header. + Only supports "bytes" unit and single range specs. + + :param range_header: Range header value (e.g., "bytes=0-99", "bytes=100-", "bytes=-500"). + :return: A tuple (offset, size). offset is negative for suffix ranges. + """ + if not range_header or not range_header.startswith("bytes="): + return 0, None + + try: + range_val = range_header.split("=")[1] + if range_val.startswith("-"): + # bytes=-SUFFIX + return int(range_val), None + elif "-" in range_val: + # bytes=OFFSET- or bytes=OFFSET-END + start_str, end_str = range_val.split("-") + offset = int(start_str) + size = None + if end_str: + size = int(end_str) - offset + 1 + return offset, size + except (ValueError, IndexError): + pass + + return 0, None diff --git a/src/borgstore/backends/posixfs.py b/src/borgstore/backends/posixfs.py index eba050f..274bfbb 100644 --- a/src/borgstore/backends/posixfs.py +++ b/src/borgstore/backends/posixfs.py @@ -193,8 +193,8 @@ def load(self, name, *, size=None, offset=0): self._check_permission(name, "r") try: with path.open("rb") as f: - if offset > 0: - f.seek(offset) + if offset != 0: + f.seek(offset, os.SEEK_SET if offset >= 0 else os.SEEK_END) return f.read(-1 if size is None else size) except FileNotFoundError: raise ObjectNotFound(name) from None diff --git a/src/borgstore/backends/rclone.py b/src/borgstore/backends/rclone.py index 52f5aab..d87b5fa 100644 --- a/src/borgstore/backends/rclone.py +++ b/src/borgstore/backends/rclone.py @@ -17,6 +17,7 @@ requests = None from ._base import BackendBase, ItemInfo, validate_name +from ._utils import make_range_header from .errors import ( BackendError, BackendDoesNotExist, @@ -258,13 +259,23 @@ def load(self, name: str, *, size=None, offset=0) -> bytes: """Load value from .""" validate_name(name) headers = {} - if size is not None or offset > 0: - if size is not None: - headers["Range"] = f"bytes={offset}-{offset+size-1}" + if offset < 0 and size is not None: + if -offset - size <= 1024: + # Optimization: if the part of the tail we don't need is small, + # we just request the last N bytes and truncate locally. + range_header = make_range_header(offset, size=None) else: - headers["Range"] = f"bytes={offset}-" + info = self.info(name) + range_header = make_range_header(offset, size, info.size) + else: + range_header = make_range_header(offset, size) + if range_header: + headers["Range"] = range_header r = self._requests(requests.get, f"{self.url}[{self.fs}]/{name}", tries=self.TRIES, headers=headers) - return r.content + content = r.content + if offset < 0 and size is not None and size < len(content): + content = content[:size] + return content def store(self, name: str, value: bytes) -> None: """Store into .""" diff --git a/src/borgstore/backends/rest.py b/src/borgstore/backends/rest.py index 9a73fd0..6f5ce80 100644 --- a/src/borgstore/backends/rest.py +++ b/src/borgstore/backends/rest.py @@ -15,6 +15,7 @@ requests = HTTPBasicAuth = None from ._base import BackendBase, ItemInfo, validate_name +from ._utils import make_range_header from .errors import ( ObjectNotFound, BackendAlreadyExists, @@ -169,14 +170,27 @@ def load(self, name: str, *, size=None, offset=0) -> bytes: self._assert_open() validate_name(name) - r_hdr = (None if not offset else f"bytes={offset}-") if size is None else f"bytes={offset}-{offset + size - 1}" + if offset < 0 and size is not None: + if -offset - size <= 1024: + # Optimization: if the part of the tail we don't need is small, + # we just request the last N bytes and truncate locally. + range_header = make_range_header(offset, size=None) + else: + info = self.info(name) + range_header = make_range_header(offset, size, info.size) + else: + range_header = make_range_header(offset, size) + headers = self.headers.copy() - if r_hdr: - headers["Range"] = r_hdr + if range_header: + headers["Range"] = range_header response = self._request("get", self._url(name), headers=headers) self._handle_response(response, name) - return response.content + content = response.content + if offset < 0 and size is not None and size < len(content): + content = content[:size] + return content def store(self, name: str, value: bytes) -> None: self._assert_open() diff --git a/src/borgstore/backends/s3.py b/src/borgstore/backends/s3.py index df98dff..e026bfd 100644 --- a/src/borgstore/backends/s3.py +++ b/src/borgstore/backends/s3.py @@ -13,6 +13,7 @@ import urllib.parse from ._base import BackendBase, ItemInfo, validate_name +from ._utils import make_range_header from .errors import BackendError, BackendMustBeOpen, BackendMustNotBeOpen, BackendDoesNotExist, BackendAlreadyExists from .errors import ObjectNotFound @@ -187,20 +188,25 @@ def load(self, name, *, size=None, offset=0): validate_name(name) key = self.base_path + name try: - if size is None and offset == 0: + if offset < 0 and size is not None: + if -offset - size <= 1024: + # Optimization: if the part of the tail we don't need is small, + # we just request the last N bytes and truncate locally. + range_header = make_range_header(offset, size=None) + else: + info = self.info(name) + range_header = make_range_header(offset, size, info.size) + else: + range_header = make_range_header(offset, size) + + if range_header: + obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=range_header) + else: obj = self.s3.get_object(Bucket=self.bucket, Key=key) - return obj["Body"].read() - elif size is not None and offset == 0: - obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes=0-{size - 1}") - return obj["Body"].read() - elif size is None and offset != 0: - head = self.s3.head_object(Bucket=self.bucket, Key=key) - length = head["ContentLength"] - obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes={offset}-{length - 1}") - return obj["Body"].read() - elif size is not None and offset != 0: - obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes={offset}-{offset + size - 1}") - return obj["Body"].read() + content = obj["Body"].read() + if offset < 0 and size is not None and size < len(content): + content = content[:size] + return content except self.s3.exceptions.NoSuchKey: raise ObjectNotFound(name) diff --git a/src/borgstore/backends/sftp.py b/src/borgstore/backends/sftp.py index aadbf42..3add575 100644 --- a/src/borgstore/backends/sftp.py +++ b/src/borgstore/backends/sftp.py @@ -238,7 +238,7 @@ def load(self, name, *, size=None, offset=0): validate_name(name) try: with self.client.open(name) as f: - f.seek(offset) + f.seek(offset, 0 if offset >= 0 else 2) f.prefetch(size) # speeds up the following read() significantly! return f.read(size) except FileNotFoundError: diff --git a/src/borgstore/server/rest.py b/src/borgstore/server/rest.py index 3eb8ff5..fa14a62 100644 --- a/src/borgstore/server/rest.py +++ b/src/borgstore/server/rest.py @@ -15,6 +15,7 @@ BackendMustBeOpen, BackendMustNotBeOpen, ) +from ..backends._utils import parse_range_header from ..store import get_backend logger = logging.getLogger(__name__) @@ -251,18 +252,7 @@ def do_GET(self): try: range_header = self.headers.get("Range") - offset = 0 - size = None - if range_header and range_header.startswith("bytes="): - # Simple Range: bytes=OFFSET- or bytes=OFFSET-END - try: - range_val = range_header.split("=")[1] - start_str, end_str = range_val.split("-") - offset = int(start_str) - if end_str: - size = int(end_str) - offset + 1 - except ValueError: - pass + offset, size = parse_range_header(range_header) if range_header else (0, None) with self.server.backend: data = self.server.backend.load(self.name, offset=offset, size=size) diff --git a/tests/test_backends.py b/tests/test_backends.py index 221601e..93c5625 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -459,6 +459,20 @@ def test_load_partial(tested_backends, request): assert backend.load("key", size=3) == b"012" assert backend.load("key", offset=5) == b"56789" assert backend.load("key", offset=4, size=4) == b"4567" + assert backend.load("key", offset=-3) == b"789" + assert backend.load("key", offset=-4, size=2) == b"67" + + # test tail loading optimization (-offset - size <= 1024) + # offset=-10, size=9 -> -offset - size = 1 <= 1024 -> optimized + assert backend.load("key", offset=-10, size=9) == b"012345678" + # offset=-10, size=1 -> -offset - size = 9 <= 1024 -> optimized + assert backend.load("key", offset=-10, size=1) == b"0" + # offset=-2000, size=1000 -> -offset - size = 1000 <= 1024 -> optimized + backend.store("large", b"x" * 2000) + assert backend.load("large", offset=-2000, size=1000) == b"x" * 1000 + # offset=-3000, size=1000 -> -offset - size = 2000 > 1024 -> NOT optimized + backend.store("huge", b"y" * 3000) + assert backend.load("huge", offset=-3000, size=1000) == b"y" * 1000 def test_already_exists(tested_backends, request): diff --git a/tests/test_backends_utils.py b/tests/test_backends_utils.py new file mode 100644 index 0000000..b989961 --- /dev/null +++ b/tests/test_backends_utils.py @@ -0,0 +1,30 @@ +from borgstore.backends._utils import make_range_header, parse_range_header +import pytest + + +def test_make_range_header(): + # From start + assert make_range_header(0) is None + assert make_range_header(100) == "bytes=100-" + assert make_range_header(100, 50) == "bytes=100-149" + assert make_range_header(0, 50) == "bytes=0-49" + + # From end + assert make_range_header(-100) == "bytes=-100" + assert make_range_header(-100, 50, 1000) == "bytes=900-949" + + with pytest.raises(ValueError): + make_range_header(-100, 50) + + +def test_parse_range_header(): + assert parse_range_header(None) == (0, None) + assert parse_range_header("") == (0, None) + assert parse_range_header("invalid") == (0, None) + assert parse_range_header("bytes=invalid") == (0, None) + + assert parse_range_header("bytes=100-") == (100, None) + assert parse_range_header("bytes=100-149") == (100, 50) + assert parse_range_header("bytes=0-49") == (0, 50) + + assert parse_range_header("bytes=-100") == (-100, None)