Skip to content

Commit 7dd5968

Browse files
authored
Merge pull request #3 from rickhohler/fix/expose-hashing-api
Expose Hashing API
2 parents 21f07e0 + 365da4c commit 7dd5968

10 files changed

Lines changed: 446 additions & 244 deletions

File tree

Package.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ let package = Package(
2626
// Exclude hash/crypto types for WASM builds (they use NSLock)
2727
"Algorithms/DataStructures/BloomFilter.swift",
2828
"Algorithms/DataStructures/MerkleTree.swift",
29-
"Algorithms/Hashing/HashAlgorithm.swift",
3029
"Algorithms/WASMGuard.swift"
3130
]
3231
),

Sources/DesignAlgorithmsKit/Algorithms/Hashing/HashAlgorithm.swift

Lines changed: 85 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,99 @@
1+
// DesignAlgorithmsKit
2+
// Hash Algorithm Types
13
//
2-
// HashAlgorithm.swift
3-
// DesignAlgorithmsKit
4-
//
5-
// Hash Algorithm Protocol - Base protocol for hash algorithms
6-
//
4+
// Hash Algorithm Policy:
5+
// - SHA-256: Recommended default for new hash generation
6+
// - SHA-1: Legacy support for existing systems
7+
// - MD5: Read-only legacy support (validation against companion files, existing checksums)
8+
// - CRC32: Fast checksum for quick integrity checks
79

8-
#if !os(WASI)
910
import Foundation
1011

11-
#if canImport(CryptoKit)
12-
import CryptoKit
13-
#endif
14-
15-
/// Protocol for hash algorithms
16-
public protocol HashAlgorithm {
17-
/// Algorithm name
18-
static var name: String { get }
12+
/// Hash algorithms supported for file and disk image hashing
13+
public enum HashAlgorithm: String, CaseIterable, Codable, Sendable {
14+
/// SHA-256: Recommended default for new hash generation (cryptographically secure)
15+
case sha256 = "sha256"
1916

20-
/// Hash data using this algorithm
21-
/// - Parameter data: Data to hash
22-
/// - Returns: Hash value as Data
23-
static func hash(data: Data) -> Data
17+
/// SHA-1: Legacy support for existing systems (deprecated but collision-resistant)
18+
case sha1 = "sha1"
2419

25-
/// Hash a string using this algorithm
26-
/// - Parameter string: String to hash
27-
/// - Returns: Hash value as Data
28-
static func hash(string: String) -> Data
29-
}
30-
31-
extension HashAlgorithm {
32-
/// Default implementation for string hashing
33-
/// - Parameter string: String to hash
34-
/// - Returns: Hash value as Data, or empty Data if UTF-8 conversion fails
35-
/// - Note: UTF-8 conversion failure returns empty Data, which will hash to a valid hash value.
36-
/// This path is testable by creating strings that fail UTF-8 conversion (rare but possible).
37-
public static func hash(string: String) -> Data {
38-
guard let data = string.data(using: .utf8) else {
39-
// UTF-8 conversion failed - return hash of empty data
40-
// This is a valid fallback that ensures we always return a hash
41-
return hash(data: Data())
20+
/// MD5: Read-only legacy support for validation against companion files and existing checksums
21+
/// ⚠️ Do not use for new hash generation - use SHA-256 instead
22+
/// ✅ Supported for: reading companion checksum files (.md5, .md5sum), validating against existing MD5 hashes
23+
case md5 = "md5"
24+
25+
/// CRC32: Fast checksum for quick integrity checks (not cryptographic)
26+
case crc32 = "crc32"
27+
28+
public var displayName: String {
29+
rawValue.uppercased()
30+
}
31+
32+
/// Whether this algorithm is recommended for new hash generation
33+
public var isRecommendedForNewHashes: Bool {
34+
switch self {
35+
case .sha256:
36+
return true
37+
case .sha1, .md5, .crc32:
38+
return false
4239
}
43-
return hash(data: data)
4440
}
45-
}
46-
47-
/// SHA-256 hash algorithm
48-
public enum SHA256: HashAlgorithm {
49-
public static let name = "SHA-256"
5041

51-
public static func hash(data: Data) -> Data {
52-
#if canImport(CryptoKit)
53-
let digest = CryptoKit.SHA256.hash(data: data)
54-
return Data(digest)
55-
#else
56-
// Fallback implementation
57-
// In production, use CommonCrypto or another crypto library
58-
return fallbackHash(data: data)
59-
#endif
42+
/// Whether this algorithm is suitable for read-only validation (companion files, existing checksums)
43+
public var isSuitableForValidation: Bool {
44+
// All algorithms can be used for validation
45+
return true
6046
}
6147

62-
#if !canImport(CryptoKit)
63-
/// Fallback hash implementation (simple, not cryptographically secure)
64-
/// For production use, import CryptoKit or CommonCrypto
65-
/// - Note: This path is conditionally compiled and only available when CryptoKit is not available.
66-
/// It cannot be tested in environments where CryptoKit is available (like macOS/iOS test environments).
67-
/// The fallback implementation is intentionally simple and not cryptographically secure.
68-
private static func fallbackHash(data: Data) -> Data {
69-
var hash = Data(count: 32)
70-
data.withUnsafeBytes { dataBytes in
71-
hash.withUnsafeMutableBytes { hashBytes in
72-
// Simple hash (NOT cryptographically secure)
73-
// This is a placeholder - use CryptoKit in production
74-
for i in 0..<32 {
75-
var value: UInt8 = 0
76-
for j in 0..<dataBytes.count {
77-
value ^= dataBytes[j] &+ UInt8(i)
78-
}
79-
hashBytes[i] = value
80-
}
81-
}
48+
/// Hash size in bytes
49+
public var hashSize: Int {
50+
switch self {
51+
case .crc32:
52+
return 4
53+
case .md5:
54+
return 16
55+
case .sha1:
56+
return 20
57+
case .sha256:
58+
return 32
59+
}
60+
}
61+
62+
/// Whether this algorithm is suitable for small files (< 1MB)
63+
/// All algorithms are fast enough for small files, but SHA-256 is recommended
64+
public var isSuitableForSmallFiles: Bool {
65+
// All algorithms are suitable, but SHA-256 is recommended
66+
return true
67+
}
68+
69+
/// Recommended algorithm for small files
70+
public static var recommendedForSmallFiles: HashAlgorithm {
71+
return .sha256
72+
}
73+
74+
/// Recommended algorithm for millions of files
75+
/// SHA-256 is recommended up to 100M files (3.2 GB storage overhead)
76+
/// For > 100M files, consider two-stage approach (CRC32 filter + SHA-256 verification)
77+
public static var recommendedForMillionsOfFiles: HashAlgorithm {
78+
return .sha256
79+
}
80+
81+
/// Storage overhead in MB for N files
82+
public func storageOverheadMB(for fileCount: Int) -> Double {
83+
return (Double(hashSize) * Double(fileCount)) / (1024.0 * 1024.0)
84+
}
85+
86+
/// Whether this algorithm is suitable for millions of files
87+
/// SHA-256 is suitable up to ~100M files (3.2 GB storage)
88+
/// CRC32 has high collision risk for millions of files
89+
public var isSuitableForMillionsOfFiles: Bool {
90+
switch self {
91+
case .sha256, .sha1:
92+
return true // Secure, negligible collision risk
93+
case .md5:
94+
return true // Negligible collision risk, but cryptographically broken
95+
case .crc32:
96+
return false // High collision risk for millions of files
8297
}
83-
return hash
8498
}
85-
#endif
8699
}
87-
#endif
88-
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//
2+
// HashAlgorithmProtocol.swift
3+
// DesignAlgorithmsKit
4+
//
5+
// Hash Algorithm Protocol - Base protocol for hash algorithms
6+
//
7+
8+
import Foundation
9+
10+
#if canImport(CryptoKit)
11+
import CryptoKit
12+
#endif
13+
14+
/// Protocol for hash algorithms
15+
public protocol HashAlgorithmProtocol {
16+
/// Algorithm name
17+
static var name: String { get }
18+
19+
/// Hash data using this algorithm
20+
/// - Parameter data: Data to hash
21+
/// - Returns: Hash value as Data
22+
static func hash(data: Data) -> Data
23+
24+
/// Hash a string using this algorithm
25+
/// - Parameter string: String to hash
26+
/// - Returns: Hash value as Data
27+
static func hash(string: String) -> Data
28+
}
29+
30+
public extension HashAlgorithmProtocol {
31+
/// Default implementation for string hashing
32+
/// - Parameter string: String to hash
33+
/// - Returns: Hash value as Data, or empty Data if UTF-8 conversion fails
34+
/// - Note: UTF-8 conversion failure returns empty Data, which will hash to a valid hash value.
35+
/// This path is testable by creating strings that fail UTF-8 conversion (rare but possible).
36+
public static func hash(string: String) -> Data {
37+
guard let data = string.data(using: .utf8) else {
38+
// UTF-8 conversion failed - return hash of empty data
39+
// This is a valid fallback that ensures we always return a hash
40+
return hash(data: Data())
41+
}
42+
return hash(data: data)
43+
}
44+
}
45+
46+
/// SHA-256 hash algorithm
47+
public enum SHA256: HashAlgorithmProtocol {
48+
public static let name = "SHA-256"
49+
50+
public static func hash(data: Data) -> Data {
51+
#if canImport(CryptoKit)
52+
let digest = CryptoKit.SHA256.hash(data: data)
53+
return Data(digest)
54+
#else
55+
// Fallback implementation
56+
// In production, use CommonCrypto or another crypto library
57+
return fallbackHash(data: data)
58+
#endif
59+
}
60+
61+
#if !canImport(CryptoKit)
62+
/// Fallback hash implementation (simple, not cryptographically secure)
63+
/// For production use, import CryptoKit or CommonCrypto
64+
/// - Note: This path is conditionally compiled and only available when CryptoKit is not available.
65+
/// It cannot be tested in environments where CryptoKit is available (like macOS/iOS test environments).
66+
/// The fallback implementation is intentionally simple and not cryptographically secure.
67+
private static func fallbackHash(data: Data) -> Data {
68+
var hash = Data(count: 32)
69+
data.withUnsafeBytes { dataBytes in
70+
hash.withUnsafeMutableBytes { hashBytes in
71+
// Simple hash (NOT cryptographically secure)
72+
// This is a placeholder - use CryptoKit in production
73+
for i in 0..<32 {
74+
var value: UInt8 = 0
75+
for j in 0..<dataBytes.count {
76+
value ^= dataBytes[j] &+ UInt8(i)
77+
}
78+
hashBytes[i] = value
79+
}
80+
}
81+
}
82+
return hash
83+
}
84+
#endif
85+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// DesignAlgorithmsKit
2+
// HashComputation
3+
//
4+
// This file provides a static helper for computing hashes using the Strategy Pattern.
5+
6+
import Foundation
7+
8+
/// Utility for computing hashes of data
9+
/// Delegates actual computation to registered HashStrategy implementations
10+
public struct HashComputation {
11+
12+
// Register default strategies on load
13+
// Note: In Swift, static initialization is lazy. Usage of any static member triggers it.
14+
private static let _setup: Void = {
15+
HashStrategyRegistry.register(SHA256Strategy.self)
16+
HashStrategyRegistry.register(SHA1Strategy.self)
17+
HashStrategyRegistry.register(MD5Strategy.self)
18+
HashStrategyRegistry.register(CRC32Strategy.self)
19+
}()
20+
21+
/// Compute hash for data using the specified algorithm
22+
/// - Parameters:
23+
/// - data: Data to hash
24+
/// - algorithm: Hash algorithm to use
25+
/// - Returns: Hash as Data
26+
/// - Throws: Error if algorithm is not implemented/registered
27+
public static func computeHash(data: Data, algorithm: HashAlgorithm) throws -> Data {
28+
// Ensure registration is done
29+
_ = _setup
30+
31+
guard let strategy = HashStrategyRegistry.strategy(for: algorithm) else {
32+
// Using logic similar to FS error but generic
33+
throw HashError.algorithmNotImplemented(algorithm.rawValue)
34+
}
35+
36+
return strategy.compute(data: data)
37+
}
38+
39+
/// Compute hash and return as hex string (lowercase) from algorithm name string
40+
/// Convenience method for code that uses string-based algorithm names
41+
/// - Parameters:
42+
/// - data: Data to hash
43+
/// - algorithm: Hash algorithm name (e.g., "sha256", "sha1", "md5")
44+
/// - Returns: Hash as hex string (lowercase, no separators)
45+
/// - Throws: Error if hashing fails or algorithm is unsupported
46+
public static func computeHashHex(data: Data, algorithm: String) throws -> String {
47+
guard let hashAlgorithm = HashAlgorithm(rawValue: algorithm.lowercased()) else {
48+
throw HashError.algorithmNotImplemented(algorithm)
49+
}
50+
return try computeHashHex(data: data, algorithm: hashAlgorithm)
51+
}
52+
53+
/// Compute CRC32 checksum
54+
/// - Parameter data: Data to checksum
55+
/// - Returns: CRC32 as Data (4 bytes, big-endian)
56+
public static func computeCRC32(data: Data) -> Data {
57+
let strategy = CRC32Strategy()
58+
return strategy.compute(data: data)
59+
}
60+
61+
/// Compute hash and return as lowercase hex string
62+
/// - Parameters:
63+
/// - data: Data to hash
64+
/// - algorithm: Hash algorithm
65+
/// - Returns: Hex string of the hash
66+
/// - Throws: Error if algorithm is not implemented
67+
public static func computeHashHex(data: Data, algorithm: HashAlgorithm) throws -> String {
68+
let hashData = try computeHash(data: data, algorithm: algorithm)
69+
return hashData.map { String(format: "%02x", $0) }.joined()
70+
}
71+
}
72+
73+
/// Generic error for hashing failures
74+
public enum HashError: Error {
75+
case algorithmNotImplemented(String)
76+
}

0 commit comments

Comments
 (0)