|
| 1 | +import Foundation |
| 2 | +import Compression |
| 3 | + |
| 4 | +/// A utility for GZIP compression and decompression using the native `Compression` framework. |
| 5 | +public struct Gzip { |
| 6 | + |
| 7 | + /// Errors that can occur during compression or decompression. |
| 8 | + public enum Error: Swift.Error { |
| 9 | + case compressionFailed |
| 10 | + case decompressionFailed |
| 11 | + case invalidData |
| 12 | + } |
| 13 | + |
| 14 | + /// Compresses data using GZIP algorithm. |
| 15 | + /// - Parameter data: The input data to compress. |
| 16 | + /// - Returns: The compressed data. |
| 17 | + /// - Throws: `Gzip.Error.compressionFailed` if the operation fails. |
| 18 | + public static func compress(data: Data) throws -> Data { |
| 19 | + // GZIP Header (10 bytes) |
| 20 | + // Magic (2), Method (1), Flags (1), MTime (4), XFlags (1), OS (1) |
| 21 | + // GZIP Header (10 bytes) |
| 22 | + // Magic (2), Method (1), Flags (1), MTime (4), XFlags (1), OS (1) |
| 23 | + var result = Data([0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03]) |
| 24 | + |
| 25 | + // 1. Compress with ZLIB algorithm (raw deflate) using low-level API |
| 26 | + let rawDeflated = try compressRawDeflate(data) |
| 27 | + |
| 28 | + // 2. Append to Header |
| 29 | + result.append(rawDeflated) |
| 30 | + |
| 31 | + // 3. Append CRC32 (4 bytes) |
| 32 | + let crc = Checksum.crc32(data: data) |
| 33 | + result.append(contentsOf: withUnsafeBytes(of: crc.littleEndian) { Array($0) }) |
| 34 | + |
| 35 | + // 4. Append ISIZE (Input Size) modulo 2^32 (4 bytes) |
| 36 | + let isize = UInt32(data.count % 4294967296) |
| 37 | + result.append(contentsOf: withUnsafeBytes(of: isize.littleEndian) { Array($0) }) |
| 38 | + |
| 39 | + return result |
| 40 | + } |
| 41 | + |
| 42 | + /// Decompresses GZIP compressed data. |
| 43 | + /// - Parameter data: The compressed data (including GZIP header). |
| 44 | + /// - Returns: The uncompressed data. |
| 45 | + /// - Throws: `Gzip.Error.decompressionFailed` or `invalidData`. |
| 46 | + public static func decompress(data: Data) throws -> Data { |
| 47 | + // GZIP Header Validation |
| 48 | + guard data.count >= 18 else { throw Error.invalidData } |
| 49 | + guard data[0] == 0x1f, data[1] == 0x8b else { throw Error.invalidData } |
| 50 | + guard data[2] == 0x08 else { throw Error.decompressionFailed } // Method must be DEFLATE |
| 51 | + |
| 52 | + // Parse Flags |
| 53 | + var parser = DataParser(data: data) |
| 54 | + _ = try parser.readByte() // Magic 1 |
| 55 | + _ = try parser.readByte() // Magic 2 |
| 56 | + _ = try parser.readByte() // Method |
| 57 | + let flags = try parser.readByte() |
| 58 | + _ = try parser.readBytes(count: 6) // MTime, XFlags, OS |
| 59 | + |
| 60 | + // FEXTRA (0x04) |
| 61 | + if (flags & 0x04) != 0 { |
| 62 | + let xlen = try parser.readUInt16() |
| 63 | + _ = try parser.readBytes(count: Int(xlen)) |
| 64 | + } |
| 65 | + |
| 66 | + // FNAME (0x08) |
| 67 | + if (flags & 0x08) != 0 { |
| 68 | + while (try parser.readByte()) != 0 {} |
| 69 | + } |
| 70 | + |
| 71 | + // FCOMMENT (0x10) |
| 72 | + if (flags & 0x10) != 0 { |
| 73 | + while (try parser.readByte()) != 0 {} |
| 74 | + } |
| 75 | + |
| 76 | + // FHCRC (0x02) |
| 77 | + if (flags & 0x02) != 0 { |
| 78 | + _ = try parser.readBytes(count: 2) |
| 79 | + } |
| 80 | + |
| 81 | + let headerSize = parser.offset |
| 82 | + let footerSize = 8 |
| 83 | + guard data.count > headerSize + footerSize else { throw Error.invalidData } |
| 84 | + |
| 85 | + let deflatePayload = data.subdata(in: headerSize..<(data.count - footerSize)) |
| 86 | + |
| 87 | + return try decompressRawDeflate(deflatePayload) |
| 88 | + } |
| 89 | + |
| 90 | + // MARK: - Internal Helpers using ZlibProxy |
| 91 | + |
| 92 | + private static func compressRawDeflate(_ input: Data) throws -> Data { |
| 93 | + return try ZlibProxy.compressRawDeflate(data: input) |
| 94 | + } |
| 95 | + |
| 96 | + private static func decompressRawDeflate(_ input: Data) throws -> Data { |
| 97 | + return try ZlibProxy.decompressRawDeflate(data: input) |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +// MARK: - Checksum Utility |
| 102 | +struct Checksum { |
| 103 | + static func crc32(data: Data) -> UInt32 { |
| 104 | + var crc: UInt32 = 0xFFFFFFFF |
| 105 | + var table = [UInt32](repeating: 0, count: 256) |
| 106 | + |
| 107 | + // Compute table |
| 108 | + for i in 0..<256 { |
| 109 | + var c = UInt32(i) |
| 110 | + for _ in 0..<8 { |
| 111 | + if (c & 1) != 0 { |
| 112 | + c = 0xEDB88320 ^ (c >> 1) |
| 113 | + } else { |
| 114 | + c = c >> 1 |
| 115 | + } |
| 116 | + } |
| 117 | + table[i] = c |
| 118 | + } |
| 119 | + |
| 120 | + for byte in data { |
| 121 | + let index = Int((crc ^ UInt32(byte)) & 0xFF) |
| 122 | + crc = table[index] ^ (crc >> 8) |
| 123 | + } |
| 124 | + |
| 125 | + return crc ^ 0xFFFFFFFF |
| 126 | + } |
| 127 | +} |
| 128 | + |
| 129 | +// MARK: - ZLib Proxy via @_silgen_name |
| 130 | +// Bypasses 'import zlib' requirement by linking directly to system symbols |
| 131 | +struct ZlibProxy { |
| 132 | + |
| 133 | + // Compression Bindings |
| 134 | + @_silgen_name("deflateInit2_") |
| 135 | + private static func deflateInit2_(_ strm: UnsafeMutableRawPointer, _ level: Int32, _ method: Int32, _ windowBits: Int32, _ memLevel: Int32, _ strategy: Int32, _ version: UnsafePointer<CChar>, _ stream_size: Int32) -> Int32 |
| 136 | + |
| 137 | + @_silgen_name("deflate") |
| 138 | + private static func deflate(_ strm: UnsafeMutableRawPointer, _ flush: Int32) -> Int32 |
| 139 | + |
| 140 | + @_silgen_name("deflateEnd") |
| 141 | + private static func deflateEnd(_ strm: UnsafeMutableRawPointer) -> Int32 |
| 142 | + |
| 143 | + // Decompression Bindings |
| 144 | + @_silgen_name("inflateInit2_") |
| 145 | + private static func inflateInit2_(_ strm: UnsafeMutableRawPointer, _ windowBits: Int32, _ version: UnsafePointer<CChar>, _ stream_size: Int32) -> Int32 |
| 146 | + |
| 147 | + @_silgen_name("inflate") |
| 148 | + private static func inflate(_ strm: UnsafeMutableRawPointer, _ flush: Int32) -> Int32 |
| 149 | + |
| 150 | + @_silgen_name("inflateEnd") |
| 151 | + private static func inflateEnd(_ strm: UnsafeMutableRawPointer) -> Int32 |
| 152 | + |
| 153 | + // Internal z_stream structure layout (OS agnostic usually, but pointer size matters) |
| 154 | + private struct ZStream { |
| 155 | + var next_in: UnsafeMutableRawPointer? = nil |
| 156 | + var avail_in: UInt32 = 0 |
| 157 | + var total_in: UInt = 0 |
| 158 | + |
| 159 | + var next_out: UnsafeMutableRawPointer? = nil |
| 160 | + var avail_out: UInt32 = 0 |
| 161 | + var total_out: UInt = 0 |
| 162 | + |
| 163 | + var msg: UnsafePointer<CChar>? = nil |
| 164 | + var state: OpaquePointer? = nil |
| 165 | + |
| 166 | + var zalloc: OpaquePointer? = nil |
| 167 | + var zfree: OpaquePointer? = nil |
| 168 | + var opaque: OpaquePointer? = nil |
| 169 | + |
| 170 | + var data_type: Int32 = 0 |
| 171 | + var adler: UInt = 0 |
| 172 | + var reserved: UInt = 0 |
| 173 | + } |
| 174 | + |
| 175 | + static func compressRawDeflate(data: Data) throws -> Data { |
| 176 | + // ZLIB constants |
| 177 | + let Z_DEFAULT_COMPRESSION: Int32 = -1 |
| 178 | + let Z_DEFLATED: Int32 = 8 |
| 179 | + let Z_DEFAULT_STRATEGY: Int32 = 0 |
| 180 | + let Z_FINISH: Int32 = 4 |
| 181 | + // let Z_OK: Int32 = 0 |
| 182 | + let Z_STREAM_END: Int32 = 1 |
| 183 | + |
| 184 | + let windowBits: Int32 = -15 // Raw Deflate |
| 185 | + let memLevel: Int32 = 8 |
| 186 | + let version = "1.2.11" |
| 187 | + |
| 188 | + var stream = ZStream() |
| 189 | + |
| 190 | + return try data.withUnsafeBytes { inputPtr in |
| 191 | + var mutableStream = stream |
| 192 | + mutableStream.next_in = UnsafeMutableRawPointer(mutating: inputPtr.baseAddress) // can be nil |
| 193 | + mutableStream.avail_in = UInt32(inputPtr.count) |
| 194 | + |
| 195 | + let versionPtr = (version as NSString).utf8String! |
| 196 | + let res = deflateInit2_(&mutableStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, windowBits, memLevel, Z_DEFAULT_STRATEGY, versionPtr, Int32(MemoryLayout<ZStream>.size)) |
| 197 | + guard res == 0 else { throw Gzip.Error.compressionFailed } |
| 198 | + defer { |
| 199 | + var cleanupStream = mutableStream |
| 200 | + _ = deflateEnd(&cleanupStream) |
| 201 | + } |
| 202 | + |
| 203 | + var output = Data() |
| 204 | + let bufferSize = 65536 |
| 205 | + var buffer = [UInt8](repeating: 0, count: bufferSize) |
| 206 | + |
| 207 | + while true { |
| 208 | + let status = buffer.withUnsafeMutableBufferPointer { outputPtr -> Int32 in |
| 209 | + mutableStream.next_out = UnsafeMutableRawPointer(outputPtr.baseAddress!) |
| 210 | + mutableStream.avail_out = UInt32(bufferSize) |
| 211 | + return deflate(&mutableStream, Z_FINISH) |
| 212 | + } |
| 213 | + |
| 214 | + let bytesWritten = bufferSize - Int(mutableStream.avail_out) |
| 215 | + if bytesWritten > 0 { |
| 216 | + output.append(buffer, count: bytesWritten) |
| 217 | + } |
| 218 | + |
| 219 | + if status == Z_STREAM_END { break } |
| 220 | + if status != 0 && status != 1 { // Error (0 is OK, 1 is END) |
| 221 | + throw Gzip.Error.compressionFailed |
| 222 | + } |
| 223 | + } |
| 224 | + return output |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + static func decompressRawDeflate(data: Data) throws -> Data { |
| 229 | + var stream = ZStream() |
| 230 | + |
| 231 | + // windowBits = -15 for raw deflate |
| 232 | + let windowBits: Int32 = -15 |
| 233 | + let version = "1.2.11" // Just needs to be compatible |
| 234 | + |
| 235 | + return try data.withUnsafeBytes { inputPtr in |
| 236 | + var mutableStream = stream |
| 237 | + mutableStream.next_in = UnsafeMutableRawPointer(mutating: inputPtr.baseAddress!) |
| 238 | + mutableStream.avail_in = UInt32(inputPtr.count) |
| 239 | + |
| 240 | + let versionPtr = (version as NSString).utf8String! |
| 241 | + |
| 242 | + let res = inflateInit2_(&mutableStream, windowBits, versionPtr, Int32(MemoryLayout<ZStream>.size)) |
| 243 | + guard res == 0 else { throw Gzip.Error.decompressionFailed } // Z_OK = 0 |
| 244 | + defer { |
| 245 | + var cleanupStream = mutableStream |
| 246 | + _ = inflateEnd(&cleanupStream) |
| 247 | + } |
| 248 | + |
| 249 | + var output = Data() |
| 250 | + let bufferSize = 65536 |
| 251 | + var buffer = [UInt8](repeating: 0, count: bufferSize) |
| 252 | + |
| 253 | + while true { |
| 254 | + let status = buffer.withUnsafeMutableBufferPointer { outputPtr -> Int32 in |
| 255 | + mutableStream.next_out = UnsafeMutableRawPointer(outputPtr.baseAddress!) |
| 256 | + mutableStream.avail_out = UInt32(bufferSize) |
| 257 | + return inflate(&mutableStream, 0) // Z_NO_FLUSH |
| 258 | + } |
| 259 | + |
| 260 | + let bytesWritten = bufferSize - Int(mutableStream.avail_out) |
| 261 | + if bytesWritten > 0 { |
| 262 | + output.append(buffer, count: bytesWritten) |
| 263 | + } |
| 264 | + |
| 265 | + if status == 1 { break } // Z_STREAM_END |
| 266 | + if status != 0 && status != 1 { // Error |
| 267 | + // status -5 is Z_BUF_ERROR |
| 268 | + throw Gzip.Error.decompressionFailed |
| 269 | + } |
| 270 | + } |
| 271 | + return output |
| 272 | + } |
| 273 | + } |
| 274 | +} |
| 275 | + |
| 276 | +// Helper for parsing binary data |
| 277 | +struct DataParser { |
| 278 | + let data: Data |
| 279 | + var offset = 0 |
| 280 | + |
| 281 | + mutating func readByte() throws -> UInt8 { |
| 282 | + guard offset < data.count else { throw Gzip.Error.invalidData } |
| 283 | + let b = data[offset] |
| 284 | + offset += 1 |
| 285 | + return b |
| 286 | + } |
| 287 | + |
| 288 | + mutating func readBytes(count: Int) throws -> Data { |
| 289 | + guard offset + count <= data.count else { throw Gzip.Error.invalidData } |
| 290 | + let chunk = data.subdata(in: offset..<(offset+count)) |
| 291 | + offset += count |
| 292 | + return chunk |
| 293 | + } |
| 294 | + |
| 295 | + mutating func readUInt16() throws -> UInt16 { |
| 296 | + let bytes = try readBytes(count: 2) |
| 297 | + return bytes.withUnsafeBytes { $0.load(as: UInt16.self) } |
| 298 | + } |
| 299 | +} |
0 commit comments