From 006c896e5c80ae2c6750db72da38c719c35217b1 Mon Sep 17 00:00:00 2001 From: Ge Yao Date: Sun, 24 May 2026 17:20:32 +0800 Subject: [PATCH 1/2] Add seafile-download.sh to mirror Seafile client downloads Mirror Seafile desktop client downloads from www.seafile.com/download. The official download page links directly to Aliyun OSS (seafile-downloads.oss-cn-shanghai.aliyuncs.com). This script: 1. fetches the page with wget, 2. parses out OSS download URLs with Python's html.parser, 3. atomically downloads new/changed files via wget, 4. deletes stale local files bounded by TUNASYNC_MAX_DELETE. wget is used throughout because, in the tunasync Docker bridge network on at least one site, curl fails to reach Seafile's AWS-hosted OSS IPs while wget succeeds. --- seafile-download.sh | 122 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100755 seafile-download.sh diff --git a/seafile-download.sh b/seafile-download.sh new file mode 100755 index 0000000..cf708c0 --- /dev/null +++ b/seafile-download.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Mirror Seafile client downloads from seafile.com/download. +# +# The official download page embeds direct OSS download links. +# This script fetches the page with wget, parses out OSS URLs with Python, +# downloads new/changed files atomically via wget, and removes stale local +# files (bounded by TUNASYNC_MAX_DELETE). +# +# wget is used (not curl or urllib) because the tunasync Docker bridge +# network can reach the Seafile AWS origin only via wget. +set -euo pipefail + +WORKDIR="${TUNASYNC_WORKING_DIR:-}" +UPSTREAM="${TUNASYNC_UPSTREAM_URL:-https://www.seafile.com/download/}" +MAX_DELETE="${TUNASYNC_MAX_DELETE:-50}" + +if [ -z "$WORKDIR" ]; then + echo "ERROR: TUNASYNC_WORKING_DIR not set" + exit 2 +fi + +mkdir -p "$WORKDIR" +cd "$WORKDIR" + +echo "Fetching download page via wget..." +wget -qO /tmp/seafile-page.html --timeout=30 --tries=3 "$UPSTREAM" || { + echo "ERROR: wget failed to fetch $UPSTREAM" + exit 1 +} + +python3 - "$WORKDIR" "$MAX_DELETE" "$UPSTREAM" < MAX_DELETE: + print(f"WARNING: {len(stale)} stale files exceeds MAX_DELETE ({MAX_DELETE})", file=sys.stderr) + sys.exit(1) +for f in stale: + fp = os.path.join(WORKDIR, f) + print(f"Deleting stale: {f}", file=sys.stderr) + os.remove(fp) + +# Download each file atomically via wget (curl fails to Seafile AWS IPs +# from the Docker bridge network; wget works). +for url in oss_urls: + name = url.rstrip("/").rsplit("/", 1)[-1] + name = urllib.parse.unquote(name) + target = os.path.join(WORKDIR, name) + tmp = target + ".tmp" + + # Check remote size with wget --spider + r = subprocess.run( + ["wget", "--spider", "--timeout=30", "--tries=1", "-S", url], + capture_output=True, text=True) + remote_size = 0 + for line in r.stderr.split("\n"): + if "Content-Length:" in line: + remote_size = int(line.split(":")[1].strip()) + break + if r.returncode != 0: + print(f"ERROR: spider {url}: {r.stderr[-200:]}", file=sys.stderr) + sys.exit(1) + + if os.path.exists(target) and os.path.getsize(target) == remote_size: + continue + + print(f"Downloading: {name} ({remote_size} bytes)", file=sys.stderr) + r = subprocess.run( + ["wget", "-q", "--timeout=30", "--tries=3", "-O", tmp, url], + capture_output=True, text=True) + if r.returncode != 0: + print(f"ERROR: download {url}: {r.stderr[-200:]}", file=sys.stderr) + if os.path.exists(tmp): + os.remove(tmp) + sys.exit(1) + + os.replace(tmp, target) + +print("Done.", file=sys.stderr) +PY From 939ae1dd6c3b16a0229dddf693a64c9b3424cd87 Mon Sep 17 00:00:00 2001 From: Ge Yao Date: Sun, 24 May 2026 17:45:57 +0800 Subject: [PATCH 2/2] seafile-download.sh: defer cleanup, harden filename, mktemp page Address review feedback: - Move stale-file deletion after all downloads succeed so a transient network or upstream error cannot wipe the mirror. - Sanitize the OSS URL basename via urllib.parse.unquote and reject filenames that contain path separators, NUL, '..' or are empty, preventing path traversal even if the upstream HTML is hostile. - Replace the fixed /tmp/seafile-page.html path with mktemp + trap cleanup to avoid symlink/clobber races on shared hosts. - When Content-Length is missing (chunked transfer / some CDNs), fall back to downloading and verify the temp file size before promoting it, instead of treating size 0 as a match. - Confirmed shebang is at byte 0. --- seafile-download.sh | 111 +++++++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/seafile-download.sh b/seafile-download.sh index cf708c0..4bd2bbc 100755 --- a/seafile-download.sh +++ b/seafile-download.sh @@ -22,22 +22,26 @@ fi mkdir -p "$WORKDIR" cd "$WORKDIR" +PAGE=$(mktemp -t seafile-page.XXXXXX.html) +trap 'rm -f "$PAGE"' EXIT + echo "Fetching download page via wget..." -wget -qO /tmp/seafile-page.html --timeout=30 --tries=3 "$UPSTREAM" || { +wget -qO "$PAGE" --timeout=30 --tries=3 "$UPSTREAM" || { echo "ERROR: wget failed to fetch $UPSTREAM" exit 1 } -python3 - "$WORKDIR" "$MAX_DELETE" "$UPSTREAM" < MAX_DELETE: - print(f"WARNING: {len(stale)} stale files exceeds MAX_DELETE ({MAX_DELETE})", file=sys.stderr) - sys.exit(1) -for f in stale: - fp = os.path.join(WORKDIR, f) - print(f"Deleting stale: {f}", file=sys.stderr) - os.remove(fp) - -# Download each file atomically via wget (curl fails to Seafile AWS IPs -# from the Docker bridge network; wget works). -for url in oss_urls: - name = url.rstrip("/").rsplit("/", 1)[-1] - name = urllib.parse.unquote(name) - target = os.path.join(WORKDIR, name) - tmp = target + ".tmp" - # Check remote size with wget --spider +def remote_size_via_spider(url): + """Get Content-Length using wget --spider. Returns int or None.""" r = subprocess.run( ["wget", "--spider", "--timeout=30", "--tries=1", "-S", url], capture_output=True, text=True) - remote_size = 0 + if r.returncode != 0: + return None, r.stderr + size = None for line in r.stderr.split("\n"): if "Content-Length:" in line: - remote_size = int(line.split(":")[1].strip()) - break - if r.returncode != 0: - print(f"ERROR: spider {url}: {r.stderr[-200:]}", file=sys.stderr) - sys.exit(1) + try: + size = int(line.split(":", 1)[1].strip()) + except ValueError: + pass + return size, r.stderr + + +# Download each file atomically via wget (curl fails to Seafile AWS IPs +# from the Docker bridge network; wget works). +new_files = [] +for url in oss_urls: + name = url_to_name[url] + target = os.path.join(WORKDIR, name) + tmp = target + ".tmp" - if os.path.exists(target) and os.path.getsize(target) == remote_size: + remote_size, stderr = remote_size_via_spider(url) + if remote_size is None: + # Some CDNs strip Content-Length on chunked or 302 responses; we still + # need to know whether the URL itself is reachable. + if stderr and "200 OK" not in stderr and "remote file exists" not in stderr.lower(): + print(f"ERROR: spider {url}: {stderr[-200:]}", file=sys.stderr) + sys.exit(1) + # Fall through; we'll download and trust wget to validate. + + if (remote_size is not None + and os.path.exists(target) + and os.path.getsize(target) == remote_size): continue - print(f"Downloading: {name} ({remote_size} bytes)", file=sys.stderr) + print(f"Downloading: {name} ({remote_size} bytes)" if remote_size is not None + else f"Downloading: {name}", file=sys.stderr) r = subprocess.run( ["wget", "-q", "--timeout=30", "--tries=3", "-O", tmp, url], capture_output=True, text=True) @@ -116,7 +140,30 @@ for url in oss_urls: os.remove(tmp) sys.exit(1) + if remote_size is not None: + downloaded = os.path.getsize(tmp) + if downloaded != remote_size: + print(f"ERROR: short read for {name}: got {downloaded}, " + f"expected {remote_size}", file=sys.stderr) + os.remove(tmp) + sys.exit(1) + os.replace(tmp, target) + new_files.append(name) + +# Only delete stale files after all downloads succeeded so a transient +# upstream issue cannot wipe the mirror. +local_files = [f for f in os.listdir(WORKDIR) + if os.path.isfile(os.path.join(WORKDIR, f))] +stale = [f for f in local_files if f not in remote_names and not f.endswith(".tmp")] +if len(stale) > MAX_DELETE: + print(f"WARNING: {len(stale)} stale files exceeds MAX_DELETE ({MAX_DELETE})", + file=sys.stderr) + sys.exit(1) +for f in stale: + fp = os.path.join(WORKDIR, f) + print(f"Deleting stale: {f}", file=sys.stderr) + os.remove(fp) print("Done.", file=sys.stderr) PY