Skip to content

Commit

Permalink
Merge pull request #8472 from ThomasWaldmann/borgstore010
Browse files Browse the repository at this point in the history
changes needed for borgstore 0.1.0
  • Loading branch information
ThomasWaldmann authored Oct 15, 2024
2 parents dd7ce48 + 7be254e commit dfbd3b7
Show file tree
Hide file tree
Showing 13 changed files with 162 additions and 241 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ jobs:

windows:

if: true # build enabled
if: false # build temporary disabled
runs-on: windows-latest
timeout-minutes: 120
needs: linux
Expand Down
14 changes: 7 additions & 7 deletions docs/usage/general/repository-urls.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,29 @@ Note: you may also prepend a ``file://`` to a filesystem path to get URL style.

**Remote repositories** accessed via ssh user@host:

``ssh://user@host:port/path/to/repo`` - absolute path
``ssh://user@host:port//abs/path/to/repo`` - absolute path

``ssh://user@host:port/./path/to/repo`` - path relative to current directory

``ssh://user@host:port/~/path/to/repo`` - path relative to user's home directory
``ssh://user@host:port/rel/path/to/repo`` - path relative to current directory

**Remote repositories** accessed via sftp:

``sftp://user@host:port/path/to/repo`` - absolute path
``sftp://user@host:port//abs/path/to/repo`` - absolute path

``sftp://user@host:port/rel/path/to/repo`` - path relative to current directory

For ssh and sftp URLs, the ``user@`` and ``:port`` parts are optional.

**Remote repositories** accessed via rclone:

``rclone://remote:path`` - see the rclone docs for more details.
``rclone:remote:path`` - see the rclone docs for more details about remote:path.


If you frequently need the same repo URL, it is a good idea to set the
``BORG_REPO`` environment variable to set a default for the repo URL:

::

export BORG_REPO='ssh://user@host:port/path/to/repo'
export BORG_REPO='ssh://user@host:port/rel/path/to/repo'

Then just leave away the ``--repo`` option if you want
to use the default - it will be read from BORG_REPO then.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ classifiers = [
]
license = {text="BSD"}
dependencies = [
"borgstore ~= 0.0.4",
"borgstore ~= 0.1.0",
"msgpack >=1.0.3, <=1.1.0",
"packaging",
"platformdirs >=3.0.0, <5.0.0; sys_platform == 'darwin'", # for macOS: breaking changes in 3.0.0,
Expand Down
18 changes: 9 additions & 9 deletions scripts/msys2-install-deps
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

pacman -S --needed --noconfirm git mingw-w64-ucrt-x86_64-{toolchain,pkgconf,zstd,lz4,xxhash,openssl,python-msgpack,python-argon2_cffi,python-platformdirs,python,cython,python-setuptools,python-wheel,python-build,python-pkgconfig,python-packaging,python-pip,python-paramiko}
python -m pip install --upgrade pip
pip install pyinstaller==6.3.0

if [ "$1" = "development" ]; then
pacman -S --needed --noconfirm mingw-w64-ucrt-x86_64-python-{pytest,pytest-benchmark,pytest-cov,pytest-forked,pytest-xdist}
fi
#!/bin/bash

pacman -S --needed --noconfirm git mingw-w64-ucrt-x86_64-{toolchain,pkgconf,zstd,lz4,xxhash,openssl,rclone,python-msgpack,python-argon2_cffi,python-platformdirs,python,cython,python-setuptools,python-wheel,python-build,python-pkgconfig,python-packaging,python-pip,python-paramiko}
python -m pip install --upgrade pip
pip install pyinstaller==6.10.0

if [ "$1" = "development" ]; then
pacman -S --needed --noconfirm mingw-w64-ucrt-x86_64-python-{pytest,pytest-benchmark,pytest-cov,pytest-forked,pytest-xdist}
fi
4 changes: 4 additions & 0 deletions src/borg/archiver/repo_create_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def build_parser_repo_create(self, subparsers, common_parser, mid_common_parser)
This command creates a new, empty repository. A repository is a ``borgstore`` store
containing the deduplicated data from zero or more archives.
Repository creation can be quite slow for some kinds of stores (e.g. for ``sftp:``) -
this is due to borgstore pre-creating all directories needed, making usage of the
store faster.
Encryption mode TLDR
++++++++++++++++++++
Expand Down
2 changes: 1 addition & 1 deletion src/borg/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def archiver(tmp_path, set_env_variables):

@pytest.fixture()
def remote_archiver(archiver):
archiver.repository_location = "ssh://__testsuite__" + str(archiver.repository_path)
archiver.repository_location = "ssh://__testsuite__/" + str(archiver.repository_path)
yield archiver


Expand Down
159 changes: 43 additions & 116 deletions src/borg/helpers/parseformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,35 +401,15 @@ def parse_stringified_list(s):
class Location:
"""Object representing a repository location"""

# user@ (optional)
# user must not contain "@", ":" or "/".
# Quoting adduser error message:
# "To avoid problems, the username should consist only of letters, digits,
# underscores, periods, at signs and dashes, and not start with a dash
# (as defined by IEEE Std 1003.1-2001)."
# We use "@" as separator between username and hostname, so we must
# disallow it within the pure username part.
optional_user_re = r"""
(?:(?P<user>[^@:/]+)@)?
"""

# path must not contain :: (it ends at :: or string end), but may contain single colons.
# to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://".
local_path_re = r"""
(?!(:|//|ssh://|socket://)) # not starting with ":" or // or ssh:// or socket://
(?P<path>([^:]|(:(?!:)))+) # any chars, but no "::"
"""

# file_path must not contain :: (it ends at :: or string end), but may contain single colons.
# it must start with a / and that slash is part of the path.
file_path_re = r"""
(?P<path>(([^/]*)/([^:]|(:(?!:)))+)) # start opt. servername, then /, then any chars, but no "::"
"""

# abs_path must not contain :: (it ends at :: or string end), but may contain single colons.
# it must start with a / and that slash is part of the path.
abs_path_re = r"""
(?P<path>(/([^:]|(:(?!:)))+)) # start with /, then any chars, but no "::"
"""
optional_user_re = r"(?:(?P<user>[^@:/]+)@)?"

# host NAME, or host IP ADDRESS (v4 or v6, v6 must be in square brackets)
host_re = r"""
Expand All @@ -440,69 +420,38 @@ class Location:
)
"""

# regexes for misc. kinds of supported location specifiers:
ssh_re = re.compile(
r"""
(?P<proto>ssh):// # ssh://
"""
+ optional_user_re
+ host_re
+ r""" # user@ (optional), host name or address
(?::(?P<port>\d+))? # :port (optional)
"""
+ abs_path_re,
re.VERBOSE,
) # path
# :port (optional)
optional_port_re = r"(?::(?P<port>\d+))?"

sftp_re = re.compile(
r"""
(?P<proto>sftp):// # sftp://
"""
+ optional_user_re
+ host_re
+ r""" # user@ (optional), host name or address
(?::(?P<port>\d+))? # :port (optional)
"""
+ abs_path_re,
re.VERBOSE,
) # path
# path may contain any chars. to avoid ambiguities with other regexes,
# it must not start with "//" nor with "scheme://" nor with "rclone:".
local_path_re = r"""
(?!(//|(ssh|socket|sftp|file)://|rclone:))
(?P<path>.+)
"""

rclone_re = re.compile(
r"""
(?P<proto>rclone):// # rclone://
(?P<path>(.*))
""",
re.VERBOSE,
) # path
# abs_path must start with a slash.
abs_path_re = r"(?P<path>/.+)"

socket_re = re.compile(
r"""
(?P<proto>socket):// # socket://
"""
+ abs_path_re,
re.VERBOSE,
) # path
# path may or may not start with a slash.
abs_or_rel_path_re = r"(?P<path>.+)"

file_re = re.compile(
r"""
(?P<proto>file):// # file://
"""
+ file_path_re,
# regexes for misc. kinds of supported location specifiers:
ssh_or_sftp_re = re.compile(
r"(?P<proto>(ssh|sftp))://"
+ optional_user_re
+ host_re
+ optional_port_re
+ r"/" # this is the separator, not part of the path!
+ abs_or_rel_path_re,
re.VERBOSE,
) # servername/path or path
)

local_re = re.compile(local_path_re, re.VERBOSE) # local path
rclone_re = re.compile(r"(?P<proto>rclone):(?P<path>(.*))", re.VERBOSE)

win_file_re = re.compile(
r"""
(?:file://)? # optional file protocol
(?P<path>
(?:[a-zA-Z]:)? # Drive letter followed by a colon (optional)
(?:[^:]+) # Anything which does not contain a :, at least one char
)
""",
re.VERBOSE,
)
file_or_socket_re = re.compile(r"(?P<proto>(file|socket))://" + abs_path_re, re.VERBOSE)

local_re = re.compile(local_path_re, re.VERBOSE)

def __init__(self, text="", overrides={}, other=False):
self.repo_env_var = "BORG_OTHER_REPO" if other else "BORG_REPO"
Expand Down Expand Up @@ -532,47 +481,28 @@ def parse(self, text, overrides={}):
raise ValueError('Invalid location format: "%s"' % self.processed)

def _parse(self, text):
def normpath_special(p):
# avoid that normpath strips away our relative path hack and even makes p absolute
relative = p.startswith("/./")
p = os.path.normpath(p)
return ("/." + p) if relative else p

m = self.ssh_re.match(text)
m = self.ssh_or_sftp_re.match(text)
if m:
self.proto = m.group("proto")
self.user = m.group("user")
self._host = m.group("host")
self.port = m.group("port") and int(m.group("port")) or None
self.path = normpath_special(m.group("path"))
return True
m = self.sftp_re.match(text)
if m:
self.proto = m.group("proto")
self.user = m.group("user")
self._host = m.group("host")
self.port = m.group("port") and int(m.group("port")) or None
self.path = normpath_special(m.group("path"))
self.path = os.path.normpath(m.group("path"))
return True
m = self.rclone_re.match(text)
if m:
self.proto = m.group("proto")
self.path = m.group("path")
return True
m = self.file_re.match(text)
if m:
self.proto = m.group("proto")
self.path = normpath_special(m.group("path"))
return True
m = self.socket_re.match(text)
m = self.file_or_socket_re.match(text)
if m:
self.proto = m.group("proto")
self.path = normpath_special(m.group("path"))
self.path = os.path.normpath(m.group("path"))
return True
m = self.local_re.match(text)
if m:
self.proto = "file"
self.path = normpath_special(m.group("path"))
self.path = os.path.abspath(os.path.normpath(m.group("path")))
return True
return False

Expand All @@ -587,7 +517,7 @@ def __str__(self):
return ", ".join(items)

def to_key_filename(self):
name = re.sub(r"[^\w]", "_", self.path).strip("_")
name = re.sub(r"[^\w]", "_", self.path.rstrip("/"))
if self.proto not in ("file", "socket", "rclone"):
name = re.sub(r"[^\w]", "_", self.host) + "__" + name
if len(name) > 100:
Expand All @@ -609,20 +539,17 @@ def host(self):
def canonical_path(self):
if self.proto in ("file", "socket"):
return self.path
else:
if self.path and self.path.startswith("~"):
path = "/" + self.path # /~/x = path x relative to home dir
elif self.path and not self.path.startswith("/"):
path = "/./" + self.path # /./x = path x relative to cwd
else:
path = self.path
return "{}://{}{}{}{}".format(
self.proto if self.proto else "???",
f"{self.user}@" if self.user else "",
self._host if self._host else "", # needed for ipv6 addrs
f":{self.port}" if self.port else "",
path,
if self.proto == "rclone":
return f"{self.proto}:{self.path}"
if self.proto in ("sftp", "ssh"):
return (
f"{self.proto}://"
f"{(self.user + '@') if self.user else ''}"
f"{self._host if self._host else ''}"
f"{self.port if self.port else ''}/"
f"{self.path}"
)
raise NotImplementedError(self.proto)

def with_timestamp(self, timestamp):
# note: this only affects the repository URL/path, not the archive name!
Expand Down
8 changes: 2 additions & 6 deletions src/borg/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,8 @@ def negotiate(self, client_data):
def _resolve_path(self, path):
if isinstance(path, bytes):
path = os.fsdecode(path)
if path.startswith("/~/"): # /~/x = path x relative to own home dir
home_dir = os.environ.get("HOME") or os.path.expanduser("~%s" % os.environ.get("USER", ""))
path = os.path.join(home_dir, path[3:])
elif path.startswith("/./"): # /./x = path x relative to cwd
path = path[3:]
return os.path.realpath(path)
path = os.path.realpath(path)
return path

def open(
self,
Expand Down
15 changes: 14 additions & 1 deletion src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from borgstore.store import ObjectNotFound as StoreObjectNotFound
from borgstore.backends.errors import BackendError as StoreBackendError
from borgstore.backends.errors import BackendDoesNotExist as StoreBackendDoesNotExist
from borgstore.backends.errors import BackendAlreadyExists as StoreBackendAlreadyExists

from .checksums import xxh64
from .constants import * # NOQA
Expand Down Expand Up @@ -117,6 +118,7 @@ def __init__(
url = "file://%s" % os.path.abspath(path_or_location)
location = Location(url)
self._location = location
self.url = url
# lots of stuff in data: use 2 levels by default (data/00/00/ .. data/ff/ff/ dirs)!
data_levels = int(os.environ.get("BORG_STORE_DATA_LEVELS", "2"))
levels_config = {
Expand Down Expand Up @@ -174,13 +176,24 @@ def id_str(self):

def create(self):
"""Create a new empty repository"""
self.store.create()
try:
self.store.create()
except StoreBackendAlreadyExists:
raise self.AlreadyExists(self.url)
self.store.open()
try:
self.store.store("config/readme", REPOSITORY_README.encode())
self.version = 3
self.store.store("config/version", str(self.version).encode())
self.store.store("config/id", bin_to_hex(os.urandom(32)).encode())
# we know repo/data/ still does not have any chunks stored in it,
# but for some stores, there might be a lot of empty directories and
# listing them all might be rather slow, so we better cache an empty
# ChunkIndex from here so that the first repo operation does not have
# to build the ChunkIndex the slow way by listing all the directories.
from borg.cache import write_chunkindex_to_repo_cache

write_chunkindex_to_repo_cache(self, ChunkIndex(), compact=True, clear=True, force_write=True)
finally:
self.store.close()

Expand Down
1 change: 0 additions & 1 deletion src/borg/testsuite/archiver/checks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ def test_unknown_feature_on_mount(archivers, request):
mountpoint = os.path.join(archiver.tmpdir, "mountpoint")
os.mkdir(mountpoint)
# XXX this might hang if it doesn't raise an error
archiver.repository_location += "::test"
cmd_raises_unknown_feature(archiver, ["mount", mountpoint])


Expand Down
Loading

0 comments on commit dfbd3b7

Please sign in to comment.