From 05734999ad96b7f704849bb471ff544747185f94 Mon Sep 17 00:00:00 2001 From: David Cruz Date: Sun, 12 Apr 2026 05:13:48 -0700 Subject: [PATCH 1/3] refactor: use native archive library powered by deflate-sys --- packages/archive/lde.json | 5 +- packages/archive/src/init.lua | 325 +++++++++++++++++------- packages/archive/tests/archive.test.lua | 102 ++++---- 3 files changed, 286 insertions(+), 146 deletions(-) diff --git a/packages/archive/lde.json b/packages/archive/lde.json index 416cfb5..bb81485 100644 --- a/packages/archive/lde.json +++ b/packages/archive/lde.json @@ -3,13 +3,12 @@ "description": "Archive extraction library. Reads magic bytes to detect zip vs tar.", "version": "0.1.0", "dependencies": { - "process2": { "path": "../process2" }, "fs": { "path": "../fs" }, - "path": { "path": "../path" } + "path": { "path": "../path" }, + "deflate-sys": { "git": "https://github.com/lde-org/deflate-sys" } }, "devDependencies": { "lde-test": { "path": "../lde-test" }, - "archive": { "path": "../archive" }, "fs": { "path": "../fs" }, "env": { "path": "../env" }, "path": { "path": "../path" } diff --git a/packages/archive/src/init.lua b/packages/archive/src/init.lua index 1ab2ec6..dc37696 100644 --- a/packages/archive/src/init.lua +++ b/packages/archive/src/init.lua @@ -1,24 +1,228 @@ -local process = require("process2") -local fs = require("fs") -local path = require("path") - -local ZIP_MAGIC = "\80\75\3\4" -- PK\x03\x04 - ----@param filePath string ----@return boolean -local function isZip(filePath) - local f = io.open(filePath, "rb") - if not f then return false end - local magic = f:read(4) - f:close() - return magic == ZIP_MAGIC +---@diagnostic disable: assign-type-mismatch + +local ffi = require("ffi") +local buf = require("string.buffer") +local deflate = require("deflate-sys") +local fs = require("fs") +local path = require("path") + +ffi.cdef [[ + typedef struct __attribute__((packed)) { + uint32_t sig; uint16_t ver, flags, method, mtime, mdate; + uint32_t crc, compSize, uncompSize; + uint16_t nameLen, extraLen; + } ZipLocal; + + typedef struct __attribute__((packed)) { + uint32_t sig; uint16_t verMade, verNeed, flags, method, mtime, mdate; + uint32_t crc, compSize, uncompSize; + uint16_t nameLen, extraLen, commentLen, disk, iattr; + uint32_t eattr, offset; + } ZipCD; + + typedef struct __attribute__((packed)) { + uint32_t sig; uint16_t disk, diskCd, count, total; + uint32_t cdSize, cdOffset; + uint16_t commentLen; + } ZipEOCD; + + typedef struct __attribute__((packed)) { + char name[100], mode[8], uid[8], gid[8], size[12], mtime[12], + checksum[8], typeflag, linkname[100], magic[6], version[2], + uname[32], gname[32], devmajor[8], devminor[8], prefix[155], pad[12]; + } TarHeader; +]] + +---@class ZipLocal: ffi.cdata* +---@field sig number +---@field ver number +---@field flags number +---@field method number +---@field crc number +---@field compSize number +---@field uncompSize number +---@field nameLen number +---@field extraLen number + +---@class ZipCD: ffi.cdata* +---@field sig number +---@field crc number +---@field compSize number +---@field uncompSize number +---@field nameLen number +---@field extraLen number +---@field commentLen number +---@field method number +---@field offset number + +---@class ZipEOCD: ffi.cdata* +---@field sig number +---@field count number +---@field total number +---@field cdSize number +---@field cdOffset number + +---@class TarHeader: ffi.cdata* +---@field name string +---@field mode string +---@field size string +---@field mtime string +---@field checksum string +---@field typeflag number +---@field magic string +---@field version string + +---@type fun(...): ZipLocal +local ZipLocalT = ffi.typeof("ZipLocal") +---@type fun(...): ZipCD +local ZipCDT = ffi.typeof("ZipCD") +---@type fun(...): ZipEOCD +local ZipEOCDT = ffi.typeof("ZipEOCD") +---@type fun(): TarHeader +local TarHeaderT = ffi.typeof("TarHeader") + +local tarHeaderSize = ffi.sizeof("TarHeader") + +---@param base string +---@param name string +---@param content string +local function writeFile(base, name, content) + local out = path.join(base, name) + local dir = path.dirname(out) + if dir then fs.mkdir(dir) end + fs.write(out, content) end +-- ── ZIP extract ─────────────────────────────────────────────────────────────── + +---@param data string +---@param toPath string +---@param strip boolean +local function zipExtract(data, toPath, strip) + local dptr = ffi.cast("const uint8_t *", data) + local eocdOff = #data - 22 + while eocdOff >= 0 and ffi.cast("ZipEOCD *", dptr + eocdOff).sig ~= 0x06054b50 do + eocdOff = eocdOff - 1 + end + assert(eocdOff >= 0, "ZIP: EOCD not found") + ---@type ZipEOCD + local eocd = ffi.cast("ZipEOCD *", dptr + eocdOff) + local cd = ffi.cast("const uint8_t *", dptr + eocd.cdOffset) + + for _ = 1, eocd.total do + ---@type ZipCD + local e = ffi.cast("ZipCD *", cd) + assert(e.sig == 0x02014b50, "ZIP: bad CD entry") + local name = ffi.string(cd + ffi.sizeof("ZipCD"), e.nameLen) + if strip then name = name:match("^[^/]*/(.+)") or name end + if name:sub(-1) ~= "/" then + ---@type ZipLocal + local lh = ffi.cast("ZipLocal *", dptr + e.offset) + local raw = ffi.string(dptr + e.offset + ffi.sizeof("ZipLocal") + lh.nameLen + lh.extraLen, e.compSize) + local content = e.method == 0 and raw or deflate.deflateDecompress(raw, e.uncompSize) + writeFile(toPath, name, content) + else + fs.mkdir(path.join(toPath, name)) + end + cd = cd + ffi.sizeof("ZipCD") + e.nameLen + e.extraLen + e.commentLen + end +end + +-- ── ZIP save ────────────────────────────────────────────────────────────────── + +---@param files table +---@param toPath string +local function zipSave(files, toPath) + local out = buf.new() + local cdBuf = buf.new() + local offset, count = 0, 0 + + for name, content in pairs(files) do + local comp = deflate.deflateCompress(content, 6) + local crc = deflate.crc32(content) + + local lh = ZipLocalT(0x04034b50, 20, 0, 8, 0, 0, crc, #comp, #content, #name, 0) + out:putcdata(lh, ffi.sizeof(lh)); out:put(name, comp) + + local cd = ZipCDT(0x02014b50, 20, 20, 0, 8, 0, 0, crc, #comp, #content, #name, 0, 0, 0, 0, 0, offset) + cdBuf:putcdata(cd, ffi.sizeof(cd)); cdBuf:put(name) + + offset = offset + ffi.sizeof(lh) + #name + #comp + count = count + 1 + end + + local cdStr = cdBuf:tostring() + local eocd = ZipEOCDT(0x06054b50, 0, 0, count, count, #cdStr, offset, 0) + out:put(cdStr); out:putcdata(eocd, ffi.sizeof(eocd)) + return fs.write(toPath, out:tostring()) +end + +-- ── TAR extract ─────────────────────────────────────────────────────────────── + +---@param data string +---@param toPath string +---@param strip boolean +local function tarExtract(data, toPath, strip) + local dptr = ffi.cast("const uint8_t *", data) + local pos = 0 + while pos + tarHeaderSize <= #data do + ---@type TarHeader + local h = ffi.cast("TarHeader *", dptr + pos) + if h.name[0] == 0 then break end + local name = ffi.string(h.name) + local size = tonumber(ffi.string(h.size, 11), 8) or 0 + pos = pos + tarHeaderSize + if strip then name = name:match("^[^/]*/(.+)") or name end + if h.typeflag == string.byte("5") or name:sub(-1) == "/" then + fs.mkdir(path.join(toPath, name)) + elseif h.typeflag == string.byte("0") or h.typeflag == 0 then + writeFile(toPath, name, ffi.string(dptr + pos, size)) + end + pos = pos + math.ceil(size / 512) * 512 + end +end + +-- ── TAR save ───────────────────────────────────────────────────────────────── + +---@param files table +---@param toPath string +local function tarSave(files, toPath) + local out = buf.new() + for name, content in pairs(files) do + ---@type TarHeader + local h = TarHeaderT() + ffi.copy(h.name, name, math.min(#name, 100)) + ffi.copy(h.mode, "0000644\0", 8) + ffi.copy(h.size, string.format("%011o", #content), 11) + ffi.copy(h.mtime, "00000000000", 11) + ffi.copy(h.magic, "ustar", 5) + ffi.copy(h.version, "00", 2) + h.typeflag = string.byte("0") + local sum = 8 * 32 + local hp = ffi.cast("const uint8_t *", h) + for i = 0, tarHeaderSize - 1 do sum = sum + hp[i] end + ffi.copy(h.checksum, string.format("%06o\0 ", sum), 8) + out:putcdata(h, tarHeaderSize) + out:put(content) + local pad = (512 - (#content % 512)) % 512 + if pad > 0 then out:put(string.rep("\0", pad)) end + end + out:put(string.rep("\0", 1024)) + local tarData = out:tostring() + local final = toPath:match("%.tar%.gz$") and deflate.gzipCompress(tarData) or tarData + return fs.write(toPath, final) +end + +-- ── Archive ─────────────────────────────────────────────────────────────────── + ---@class Archive ---@field _source string | table local Archive = {} Archive.__index = Archive +---@class Archive.ExtractOptions +---@field stripComponents boolean? + --- Create a new Archive. --- Pass a file path string to decode, or a table of `{ [path] = content }` to encode. ---@param source string | table @@ -27,97 +231,48 @@ function Archive.new(source) return setmetatable({ _source = source }, Archive) end ----@class Archive.ExtractOptions ----@field stripComponents boolean? # Strip the single top-level directory when extracting (default: false) - --- Extract the archive to the given output directory. ---- Only valid when the Archive was created with a file path. ---@param toPath string ----@param opts Archive.ExtractOptions? +---@param opts Archive.ExtractOptions? ---@return boolean ok ---@return string? err function Archive:extract(toPath, opts) local src = self._source - if type(src) ~= "string" then - return false, "extract() is only valid for file-backed archives" - end - + if type(src) ~= "string" then return false, "extract() is only valid for file-backed archives" end + local f = io.open(src, "rb") + if not f then return false, "cannot open: " .. src end + local data = f:read("*a"); f:close() local strip = opts and opts.stripComponents or false - local code, _, stderr - - if jit.os == "Linux" and isZip(src) then - if strip then - local tmpDir = toPath .. ".tmp" - code, _, stderr = process.exec("unzip", { "-q", src, "-d", tmpDir }) - if code == 0 then - local iter = fs.readdir(tmpDir) - local first = iter and iter() - local inner = (first and first.type == "dir") and path.join(tmpDir, first.name) or tmpDir - fs.move(inner, toPath) - fs.rmdir(tmpDir) - end + fs.mkdir(toPath) + local ok, err = pcall(function() + if ffi.cast("const uint32_t *", data)[0] == 0x04034b50 then + zipExtract(data, toPath, strip) else - code, _, stderr = process.exec("unzip", { "-q", src, "-d", toPath }) + local raw = data:sub(1, 2) == "\31\139" and deflate.gzipDecompress(data, math.max(#data * 10, 1024 * 1024)) or data + tarExtract(raw, toPath, strip) end - else - local args = { "-xf", src, "-C", toPath } - if strip then args[#args + 1] = "--strip-components=1" end - code, _, stderr = process.exec("tar", args) - end - - if code ~= 0 then - return false, stderr - end - + end) + if not ok then return false, err end return true end --- Save the in-memory file table to an archive. ---- Infers format from extension: `.zip` or `.tar.gz`. ---- Only valid when the Archive was created with a table. +--- Infers format from extension: `.zip`, `.tar`, or `.tar.gz`. ---@param toPath string ---@return boolean ok ---@return string? err function Archive:save(toPath) local src = self._source - if type(src) ~= "table" then - return false, "save() is only valid for table-backed archives" - end - - local isZipOut = toPath:match("%.zip$") ~= nil - local isTarGz = toPath:match("%.tar%.gz$") ~= nil - if not isZipOut and not isTarGz then + if type(src) ~= "table" then return false, "save() is only valid for table-backed archives" end + local isZip = toPath:match("%.zip$") + local isTar = toPath:match("%.tar") + if not isZip and not isTar then return false, "cannot determine archive format from path (expected .zip or .tar.gz)" end - - local tmpDir = toPath .. ".tmp" - fs.mkdir(tmpDir) - - for name, content in pairs(src) do - local filePath = path.join(tmpDir, name) - local dir = path.dirname(filePath) - if dir then fs.mkdir(dir) end - if not fs.write(filePath, content) then - fs.rmdir(tmpDir) - return false, "failed to write temp file: " .. filePath - end - end - - local code, _, stderr - if isZipOut and jit.os ~= "Windows" then - code, _, stderr = process.exec("zip", { "-qr", toPath, "." }, { cwd = tmpDir }) - elseif isZipOut then - code, _, stderr = process.exec("tar", { "-cf", toPath, "-C", tmpDir, "." }) - else - code, _, stderr = process.exec("tar", { "-czf", toPath, "-C", tmpDir, "." }) - end - - fs.rmdir(tmpDir) - - if code ~= 0 then - return false, stderr - end - + local ok, err = pcall(function() + if isZip then zipSave(src, toPath) else tarSave(src, toPath) end + end) + if not ok then return false, err end return true end diff --git a/packages/archive/tests/archive.test.lua b/packages/archive/tests/archive.test.lua index 6bb9b1f..80985b8 100644 --- a/packages/archive/tests/archive.test.lua +++ b/packages/archive/tests/archive.test.lua @@ -12,23 +12,6 @@ local function tmp(name) return path.join(tmpBase, name) end --- helpers to create real archives for testing -local function makeZip(zipPath, content) - local dir = tmp("zip-src") - fs.mkdir(dir) - fs.write(path.join(dir, "hello.txt"), content) - local code = os.execute("cd " .. dir .. " && zip -q " .. zipPath .. " hello.txt") - return code == 0 or code == true -end - -local function makeTar(tarPath, content) - local dir = tmp("tar-src") - fs.mkdir(dir) - fs.write(path.join(dir, "hello.txt"), content) - local code = os.execute("cd " .. dir .. " && tar -cf " .. tarPath .. " -C " .. dir .. " hello.txt") - return code == 0 or code == true -end - -- -- Archive.new -- @@ -96,55 +79,58 @@ test.it("save encodes to .tar.gz and files are extractable", function() test.equal(fs.read(path.join(outDir, "hello.txt")), "tar content") end) --- --- tar extraction --- - test.it("extracts a .tar archive", function() local tarPath = tmp("test.tar") local outDir = tmp("out-tar") fs.mkdir(outDir) - local made = makeTar(tarPath, "tar content") - if not made then return end -- skip if tar not available + local a = Archive.new({ ["hello.txt"] = "tar content" }) + local ok = a:save(tarPath) + test.truthy(ok) - local a = Archive.new(tarPath) - local ok = a:extract(outDir) + local b = Archive.new(tarPath) + local ok2 = b:extract(outDir) + test.truthy(ok2) + test.truthy(fs.exists(path.join(outDir, "hello.txt"))) +end) + +test.it("extracts a .zip archive", function() + local zipPath = tmp("test2.zip") + local outDir = tmp("out-zip2") + fs.mkdir(outDir) + + local a = Archive.new({ ["hello.txt"] = "zip content" }) + local ok = a:save(zipPath) test.truthy(ok) + + local b = Archive.new(zipPath) + local ok2 = b:extract(outDir) + test.truthy(ok2) test.truthy(fs.exists(path.join(outDir, "hello.txt"))) end) --- --- zip extraction (linux only — mac/windows always use tar) --- +test.it("stripComponents strips top-level dir from zip", function() + local zipPath = tmp("strip.zip") + local outDir = tmp("out-strip-zip") + fs.mkdir(outDir) -if jit.os == "Linux" then - test.it("extracts a .zip archive using unzip on linux", function() - local zipPath = tmp("test.zip") - local outDir = tmp("out-zip") - fs.mkdir(outDir) - - local made = makeZip(zipPath, "zip content") - if not made then return end -- skip if zip not available - - local a = Archive.new(zipPath) - local ok = a:extract(outDir) - test.truthy(ok) - test.truthy(fs.exists(path.join(outDir, "hello.txt"))) - end) - - test.it("uses tar for non-zip on linux even without .tar extension", function() - -- a .tar renamed to .bin — magic bytes are not PK, so tar is used - local tarPath = tmp("test.tar") - local binPath = tmp("test.bin") - makeTar(tarPath, "bin content") - fs.copy(tarPath, binPath) - - local outDir = tmp("out-bin") - fs.mkdir(outDir) - - local a = Archive.new(binPath) - local ok = a:extract(outDir) - test.truthy(ok) - end) -end + local a = Archive.new({ ["topdir/hello.txt"] = "stripped" }) + a:save(zipPath) + + local b = Archive.new(zipPath) + b:extract(outDir, { stripComponents = true }) + test.equal(fs.read(path.join(outDir, "hello.txt")), "stripped") +end) + +test.it("stripComponents strips top-level dir from tar.gz", function() + local tarPath = tmp("strip.tar.gz") + local outDir = tmp("out-strip-tar") + fs.mkdir(outDir) + + local a = Archive.new({ ["topdir/hello.txt"] = "stripped" }) + a:save(tarPath) + + local b = Archive.new(tarPath) + b:extract(outDir, { stripComponents = true }) + test.equal(fs.read(path.join(outDir, "hello.txt")), "stripped") +end) From e767afbf35a71702f4353a47f956dadbf608b949 Mon Sep 17 00:00:00 2001 From: David Cruz Date: Sun, 12 Apr 2026 05:17:19 -0700 Subject: [PATCH 2/3] build: cache deflate-sys build --- .github/workflows/build.yml | 6 ++++++ .github/workflows/nightly.yml | 6 ++++++ .github/workflows/release.yml | 6 ++++++ .github/workflows/test.yml | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2839f48..a6bd172 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -120,6 +120,12 @@ jobs: path: packages/*/target/curl-sys key: curl-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Cache deflate-sys build + uses: actions/cache@v5 + with: + path: packages/*/target/deflate-sys + key: deflate-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Build lde if: "!matrix.android" run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 0e7d4fa..4111605 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -128,6 +128,12 @@ jobs: path: packages/*/target/curl-sys key: curl-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Cache deflate-sys build + uses: actions/cache@v5 + with: + path: packages/*/target/deflate-sys + key: deflate-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Build lde if: "!matrix.android" run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a4624ee..c06fde9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -124,6 +124,12 @@ jobs: path: packages/*/target/curl-sys key: curl-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Cache deflate-sys build + uses: actions/cache@v5 + with: + path: packages/*/target/deflate-sys + key: deflate-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Build lde if: "!matrix.android" run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d330b77..36eb27d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -118,6 +118,12 @@ jobs: path: packages/*/target/curl-sys key: curl-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Cache deflate-sys build + uses: actions/cache@v5 + with: + path: packages/*/target/deflate-sys + key: deflate-sys-${{ matrix.os }}-${{ runner.arch }}${{ matrix.android && '-android' || '' }} + - name: Build lde # lde's own tests invoke the lde binary directly (e.g. to test CLI flags), # so we must compile a fresh binary from source before running tests. From 22dfcf8e9e2b822f6bda74777b8ac2982d5a6a34 Mon Sep 17 00:00:00 2001 From: David Cruz Date: Sun, 12 Apr 2026 05:30:51 -0700 Subject: [PATCH 3/3] fix(archive): handle deeply nested zips --- packages/archive/src/init.lua | 10 ++++++++-- packages/archive/tests/archive.test.lua | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/packages/archive/src/init.lua b/packages/archive/src/init.lua index dc37696..1bfcd01 100644 --- a/packages/archive/src/init.lua +++ b/packages/archive/src/init.lua @@ -83,13 +83,19 @@ local TarHeaderT = ffi.typeof("TarHeader") local tarHeaderSize = ffi.sizeof("TarHeader") +---@param dir string +local function mkdirp(dir) + if fs.isdir(dir) then return end + mkdirp(path.dirname(dir)) + fs.mkdir(dir) +end + ---@param base string ---@param name string ---@param content string local function writeFile(base, name, content) local out = path.join(base, name) - local dir = path.dirname(out) - if dir then fs.mkdir(dir) end + mkdirp(path.dirname(out)) fs.write(out, content) end diff --git a/packages/archive/tests/archive.test.lua b/packages/archive/tests/archive.test.lua index 80985b8..e7ac8ef 100644 --- a/packages/archive/tests/archive.test.lua +++ b/packages/archive/tests/archive.test.lua @@ -122,6 +122,23 @@ test.it("stripComponents strips top-level dir from zip", function() test.equal(fs.read(path.join(outDir, "hello.txt")), "stripped") end) +-- regression: zips with no explicit directory entries (e.g. .src.rock files) +-- must still extract deeply nested files by creating parent dirs recursively +test.it("extracts zip with deeply nested files and no explicit dir entries", function() + local zipPath = tmp("nested.zip") + local outDir = tmp("out-nested") + fs.mkdir(outDir) + + -- save creates file entries only, no dir entries — matches .src.rock behavior + local a = Archive.new({ ["a/b/c/deep.lua"] = "deep content" }) + a:save(zipPath) + + local b = Archive.new(zipPath) + local ok = b:extract(outDir) + test.truthy(ok) + test.equal(fs.read(path.join(outDir, "a/b/c/deep.lua")), "deep content") +end) + test.it("stripComponents strips top-level dir from tar.gz", function() local tarPath = tmp("strip.tar.gz") local outDir = tmp("out-strip-tar")