From b0f30969899459b3d1f8c7063261eb6779734b03 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Tue, 19 May 2026 04:49:26 +0530 Subject: [PATCH] legacy: move UpgraderFrom12To20 to borg.legacy, refs #9556 --- src/borg/archiver/transfer_cmd.py | 7 +- src/borg/legacy/upgrade.py | 141 ++++++++++++++++++++++++++++++ src/borg/upgrade.py | 138 ----------------------------- 3 files changed, 146 insertions(+), 140 deletions(-) create mode 100644 src/borg/legacy/upgrade.py diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py index e37ddf57b2..f6c723f7f6 100644 --- a/src/borg/archiver/transfer_cmd.py +++ b/src/borg/archiver/transfer_cmd.py @@ -172,6 +172,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non raise Error("\n".join(ac_errors)) from .. import upgrade as upgrade_mod + from ..legacy import upgrade as legacy_upgrade_mod v1_or_v2 = getattr(args, "v1_or_v2", False) upgrader = args.upgrader @@ -179,11 +180,13 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non upgrader = "From12To20" try: - UpgraderCls = getattr(upgrade_mod, f"Upgrader{upgrader}") + UpgraderCls = getattr(upgrade_mod, f"Upgrader{upgrader}", None) or getattr( + legacy_upgrade_mod, f"Upgrader{upgrader}" + ) except AttributeError: raise Error(f"No such upgrader: {upgrader}") - if UpgraderCls is not upgrade_mod.UpgraderFrom12To20 and other_manifest.repository.version == 1: + if UpgraderCls is not legacy_upgrade_mod.UpgraderFrom12To20 and other_manifest.repository.version == 1: raise Error("To transfer from a borg 1.x repo, you need to use: --upgrader=From12To20") upgrader = UpgraderCls(cache=cache, args=args) diff --git a/src/borg/legacy/upgrade.py b/src/borg/legacy/upgrade.py new file mode 100644 index 0000000000..5ebd91eada --- /dev/null +++ b/src/borg/legacy/upgrade.py @@ -0,0 +1,141 @@ +from struct import Struct +from types import NoneType + +from ..constants import REQUIRED_ITEM_KEYS, CH_BUZHASH +from ..compress import ZLIB, ZLIB_legacy, ObfuscateSize +from ..helpers import HardLinkManager, join_cmd +from ..item import Item +from ..logger import create_logger + +logger = create_logger(__name__) + + +class UpgraderFrom12To20: + borg1_header_fmt = Struct(">I") + + def __init__(self, *, cache, args): + self.cache = cache + self.args = args + + def new_archive(self, *, archive): + self.archive = archive + # hlid -> chunks_correct list (or None, for contentless hardlinks) + self.hlm = HardLinkManager(id_type=bytes, info_type=(list, NoneType)) + + def upgrade_item(self, *, item): + """Upgrades the item as needed and removes legacy data.""" + ITEM_KEY_WHITELIST = { + "path", + "rdev", + "chunks", + "hlid", + "mode", + "user", + "group", + "uid", + "gid", + "mtime", + "atime", + "ctime", + "birthtime", + "size", + "xattrs", + "bsdflags", + "acl_nfs4", + "acl_access", + "acl_default", + "acl_extended", + } + + if self.hlm.borg1_hardlink_master(item): + item.hlid = hlid = self.hlm.hardlink_id_from_path(item.path) + self.hlm.remember(id=hlid, info=item.get("chunks")) + elif self.hlm.borg1_hardlink_slave(item): + item.hlid = hlid = self.hlm.hardlink_id_from_path(item.source) + chunks = self.hlm.retrieve(id=hlid) + if chunks is not None: + item.chunks = chunks + for chunk_id, chunk_size in chunks: + self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats) + del item.source # not used for hard links anymore, replaced by hlid + # make sure we only have desired stuff in the new item. specifically, make sure to get rid of: + # - 'acl' remnants of bug in attic <= 0.13 + # - 'hardlink_master' (superseded by hlid) + item_dict = item.as_dict() + new_item_dict = {key: value for key, value in item_dict.items() if key in ITEM_KEY_WHITELIST} + # symlink targets were .source for borg1, but borg2 uses .target: + if "source" in item_dict: + new_item_dict["target"] = item_dict["source"] + assert "source" not in new_item_dict + # remove some pointless entries older borg put in there: + for key in "user", "group": + if key in new_item_dict and new_item_dict[key] is None: + del new_item_dict[key] + assert not any(value is None for value in new_item_dict.values()), f"found None value in {new_item_dict}" + new_item = Item(internal_dict=new_item_dict) + new_item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks + assert all(key in new_item for key in REQUIRED_ITEM_KEYS) + return new_item + + def upgrade_compressed_chunk(self, meta, data): + # meta/data was parsed via RepoObj1.parse, which returns data **including** the ctype/clevel bytes prefixed + def upgrade_zlib_and_level(meta, data): + if ZLIB_legacy.detect(data): + ctype = ZLIB.ID + data = bytes(data) # ZLIB_legacy has no ctype/clevel prefix + else: + ctype = data[0] + data = bytes(data[2:]) # strip ctype/clevel bytes + meta["ctype"] = ctype + meta["clevel"] = level + meta["csize"] = len(data) # we may have stripped some prefixed ctype/clevel bytes + return meta, data + + ctype = data[0] + level = 0xFF # means unknown compression level + + if ctype == ObfuscateSize.ID: + # in older borg, we used unusual byte order + hlen = self.borg1_header_fmt.size + csize_bytes = data[2 : 2 + hlen] + csize = self.borg1_header_fmt.unpack(csize_bytes)[0] + compressed = data[2 + hlen : 2 + hlen + csize] + meta, compressed = upgrade_zlib_and_level(meta, compressed) + meta["psize"] = csize + osize = len(data) - 2 - hlen - csize # amount of 0x00 bytes appended for obfuscation + data = compressed + bytes(osize) + meta["csize"] = len(data) + else: + meta, data = upgrade_zlib_and_level(meta, data) + return meta, data + + def upgrade_archive_metadata(self, *, metadata): + new_metadata = {} + # keep all metadata except archive version and stats. also do not keep + # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2. + for attr in ("hostname", "username", "comment", "chunker_params"): + if hasattr(metadata, attr): + new_metadata[attr] = getattr(metadata, attr) + # if cwd is None, we want to drop it from metadata, so we set it to None here, and save() will drop it. + new_metadata["cwd"] = getattr(metadata, "cwd", None) + rechunking = self.args.chunker_params is not None + if rechunking: + # if we are rechunking while transferring, we take the new chunker_params. + new_metadata["chunker_params"] = self.args.chunker_params + else: + if chunker_params := new_metadata.get("chunker_params"): + if len(chunker_params) == 4 and isinstance(chunker_params[0], int): + # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash: + new_metadata["chunker_params"] = (CH_BUZHASH,) + chunker_params + # old borg used UTC timestamps, but did not have the explicit tz offset in them. + # the only important timestamp is "time", which has the nominal timestamp of the archive. + if hasattr(metadata, "time"): + new_metadata["time"] = getattr(metadata, "time") + "+00:00" + # borg 1: cmdline, recreate_cmdline: a copy of sys.argv + # borg 2: command_line, recreate_command_line: a single string + if hasattr(metadata, "cmdline"): + new_metadata["command_line"] = join_cmd(getattr(metadata, "cmdline")) + if hasattr(metadata, "recreate_cmdline"): + new_metadata["recreate_command_line"] = join_cmd(getattr(metadata, "recreate_cmdline")) + new_metadata["tags"] = [] + return new_metadata diff --git a/src/borg/upgrade.py b/src/borg/upgrade.py index 2c562b4691..e8ec00da90 100644 --- a/src/borg/upgrade.py +++ b/src/borg/upgrade.py @@ -1,10 +1,3 @@ -from struct import Struct -from types import NoneType - -from .constants import REQUIRED_ITEM_KEYS, CH_BUZHASH -from .compress import ZLIB, ZLIB_legacy, ObfuscateSize -from .helpers import HardLinkManager, join_cmd -from .item import Item from .logger import create_logger logger = create_logger(__name__) @@ -45,134 +38,3 @@ def upgrade_archive_metadata(self, *, metadata): # if we are rechunking while transferring, we take the new chunker_params. new_metadata["chunker_params"] = self.args.chunker_params return new_metadata - - -class UpgraderFrom12To20: - borg1_header_fmt = Struct(">I") - - def __init__(self, *, cache, args): - self.cache = cache - self.args = args - - def new_archive(self, *, archive): - self.archive = archive - # hlid -> chunks_correct list (or None, for contentless hardlinks) - self.hlm = HardLinkManager(id_type=bytes, info_type=(list, NoneType)) - - def upgrade_item(self, *, item): - """Upgrades the item as needed and removes legacy data.""" - ITEM_KEY_WHITELIST = { - "path", - "rdev", - "chunks", - "hlid", - "mode", - "user", - "group", - "uid", - "gid", - "mtime", - "atime", - "ctime", - "birthtime", - "size", - "xattrs", - "bsdflags", - "acl_nfs4", - "acl_access", - "acl_default", - "acl_extended", - } - - if self.hlm.borg1_hardlink_master(item): - item.hlid = hlid = self.hlm.hardlink_id_from_path(item.path) - self.hlm.remember(id=hlid, info=item.get("chunks")) - elif self.hlm.borg1_hardlink_slave(item): - item.hlid = hlid = self.hlm.hardlink_id_from_path(item.source) - chunks = self.hlm.retrieve(id=hlid) - if chunks is not None: - item.chunks = chunks - for chunk_id, chunk_size in chunks: - self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats) - del item.source # not used for hard links anymore, replaced by hlid - # make sure we only have desired stuff in the new item. specifically, make sure to get rid of: - # - 'acl' remnants of bug in attic <= 0.13 - # - 'hardlink_master' (superseded by hlid) - item_dict = item.as_dict() - new_item_dict = {key: value for key, value in item_dict.items() if key in ITEM_KEY_WHITELIST} - # symlink targets were .source for borg1, but borg2 uses .target: - if "source" in item_dict: - new_item_dict["target"] = item_dict["source"] - assert "source" not in new_item_dict - # remove some pointless entries older borg put in there: - for key in "user", "group": - if key in new_item_dict and new_item_dict[key] is None: - del new_item_dict[key] - assert not any(value is None for value in new_item_dict.values()), f"found None value in {new_item_dict}" - new_item = Item(internal_dict=new_item_dict) - new_item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks - assert all(key in new_item for key in REQUIRED_ITEM_KEYS) - return new_item - - def upgrade_compressed_chunk(self, meta, data): - # meta/data was parsed via RepoObj1.parse, which returns data **including** the ctype/clevel bytes prefixed - def upgrade_zlib_and_level(meta, data): - if ZLIB_legacy.detect(data): - ctype = ZLIB.ID - data = bytes(data) # ZLIB_legacy has no ctype/clevel prefix - else: - ctype = data[0] - data = bytes(data[2:]) # strip ctype/clevel bytes - meta["ctype"] = ctype - meta["clevel"] = level - meta["csize"] = len(data) # we may have stripped some prefixed ctype/clevel bytes - return meta, data - - ctype = data[0] - level = 0xFF # means unknown compression level - - if ctype == ObfuscateSize.ID: - # in older borg, we used unusual byte order - hlen = self.borg1_header_fmt.size - csize_bytes = data[2 : 2 + hlen] - csize = self.borg1_header_fmt.unpack(csize_bytes)[0] - compressed = data[2 + hlen : 2 + hlen + csize] - meta, compressed = upgrade_zlib_and_level(meta, compressed) - meta["psize"] = csize - osize = len(data) - 2 - hlen - csize # amount of 0x00 bytes appended for obfuscation - data = compressed + bytes(osize) - meta["csize"] = len(data) - else: - meta, data = upgrade_zlib_and_level(meta, data) - return meta, data - - def upgrade_archive_metadata(self, *, metadata): - new_metadata = {} - # keep all metadata except archive version and stats. also do not keep - # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2. - for attr in ("hostname", "username", "comment", "chunker_params"): - if hasattr(metadata, attr): - new_metadata[attr] = getattr(metadata, attr) - # if cwd is None, we want to drop it from metadata, so we set it to None here, and save() will drop it. - new_metadata["cwd"] = getattr(metadata, "cwd", None) - rechunking = self.args.chunker_params is not None - if rechunking: - # if we are rechunking while transferring, we take the new chunker_params. - new_metadata["chunker_params"] = self.args.chunker_params - else: - if chunker_params := new_metadata.get("chunker_params"): - if len(chunker_params) == 4 and isinstance(chunker_params[0], int): - # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash: - new_metadata["chunker_params"] = (CH_BUZHASH,) + chunker_params - # old borg used UTC timestamps, but did not have the explicit tz offset in them. - # the only important timestamp is "time", which has the nominal timestamp of the archive. - if hasattr(metadata, "time"): - new_metadata["time"] = getattr(metadata, "time") + "+00:00" - # borg 1: cmdline, recreate_cmdline: a copy of sys.argv - # borg 2: command_line, recreate_command_line: a single string - if hasattr(metadata, "cmdline"): - new_metadata["command_line"] = join_cmd(getattr(metadata, "cmdline")) - if hasattr(metadata, "recreate_cmdline"): - new_metadata["recreate_command_line"] = join_cmd(getattr(metadata, "recreate_cmdline")) - new_metadata["tags"] = [] - return new_metadata