|
| 1 | +import Foundation |
| 2 | +import SwiftSoup |
| 3 | + |
| 4 | +public enum ReaderPostParser { |
| 5 | + public enum InteractiveElement: Sendable { |
| 6 | + case gallery(Gallery) |
| 7 | + } |
| 8 | + |
| 9 | + public struct Gallery: Sendable { |
| 10 | + public let images: [GalleryImage] |
| 11 | + } |
| 12 | + |
| 13 | + public struct GalleryImage: Sendable { |
| 14 | + /// URL from the `src` attribute (displayed, possibly resized). |
| 15 | + public let src: URL |
| 16 | + /// Full-resolution URL from `data-orig-file`. |
| 17 | + public let originalFileURL: URL? |
| 18 | + /// Original dimensions from `data-orig-size` (e.g. "4032,3024"). |
| 19 | + public let originalSize: CGSize? |
| 20 | + /// All srcset variants with their width descriptors. |
| 21 | + public let srcset: [SrcsetEntry] |
| 22 | + /// From `data-image-description`. |
| 23 | + public let description: String? |
| 24 | + /// From `data-image-caption`. |
| 25 | + public let caption: String? |
| 26 | + } |
| 27 | + |
| 28 | + public struct SrcsetEntry: Sendable { |
| 29 | + public let url: URL |
| 30 | + public let width: Int |
| 31 | + } |
| 32 | + |
| 33 | + /// Parses post HTML and returns interactive elements (galleries). |
| 34 | + public static func parse(_ html: String) -> [InteractiveElement] { |
| 35 | + guard let document = try? SwiftSoup.parse(html) else { |
| 36 | + return [] |
| 37 | + } |
| 38 | + |
| 39 | + var elements: [InteractiveElement] = [] |
| 40 | + |
| 41 | + // Supported gallery selectors (order matters for specificity) |
| 42 | + let selectors = [ |
| 43 | + "figure.wp-block-gallery", |
| 44 | + "div.wp-block-gallery", |
| 45 | + "figure.wp-block-jetpack-tiled-gallery", |
| 46 | + "div.wp-block-jetpack-tiled-gallery", |
| 47 | + "div.tiled-gallery", |
| 48 | + "div.gallery" |
| 49 | + ] |
| 50 | + |
| 51 | + for selector in selectors { |
| 52 | + guard let containers = try? document.select(selector) else { continue } |
| 53 | + for container in containers { |
| 54 | + let images = parseImages(from: container) |
| 55 | + if !images.isEmpty { |
| 56 | + elements.append(.gallery(Gallery(images: images))) |
| 57 | + } |
| 58 | + // Remove the container so nested galleries aren't matched again |
| 59 | + try? container.remove() |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + return elements |
| 64 | + } |
| 65 | + |
| 66 | + private static func parseImages(from container: Element) -> [GalleryImage] { |
| 67 | + guard let imgElements = try? container.select("img") else { |
| 68 | + return [] |
| 69 | + } |
| 70 | + return imgElements.compactMap { parseImage(from: $0) } |
| 71 | + } |
| 72 | + |
| 73 | + private static func parseImage(from img: Element) -> GalleryImage? { |
| 74 | + guard let srcString = try? img.attr("src"), |
| 75 | + !srcString.isEmpty, |
| 76 | + let src = URL(string: srcString) else { |
| 77 | + return nil |
| 78 | + } |
| 79 | + |
| 80 | + let originalFileURL: URL? = { |
| 81 | + guard let value = try? img.attr("data-orig-file"), !value.isEmpty else { return nil } |
| 82 | + return URL(string: value) |
| 83 | + }() |
| 84 | + |
| 85 | + let originalSize: CGSize? = { |
| 86 | + guard let value = try? img.attr("data-orig-size"), !value.isEmpty else { return nil } |
| 87 | + return parseSize(value) |
| 88 | + }() |
| 89 | + |
| 90 | + let srcset: [SrcsetEntry] = { |
| 91 | + guard let value = try? img.attr("srcset"), !value.isEmpty else { return [] } |
| 92 | + return parseSrcset(value) |
| 93 | + }() |
| 94 | + |
| 95 | + let description: String? = { |
| 96 | + guard let value = try? img.attr("data-image-description"), !value.isEmpty else { return nil } |
| 97 | + // Strip HTML tags from description |
| 98 | + return try? SwiftSoup.clean(value, Whitelist.none()) |
| 99 | + }() |
| 100 | + |
| 101 | + let caption: String? = { |
| 102 | + guard let value = try? img.attr("data-image-caption"), !value.isEmpty else { return nil } |
| 103 | + // Strip HTML tags from caption |
| 104 | + return try? SwiftSoup.clean(value, Whitelist.none()) |
| 105 | + }() |
| 106 | + |
| 107 | + return GalleryImage( |
| 108 | + src: src, |
| 109 | + originalFileURL: originalFileURL, |
| 110 | + originalSize: originalSize, |
| 111 | + srcset: srcset, |
| 112 | + description: description, |
| 113 | + caption: caption |
| 114 | + ) |
| 115 | + } |
| 116 | + |
| 117 | + /// Parses "W,H" format (e.g. "4032,3024") into CGSize. |
| 118 | + private static func parseSize(_ value: String) -> CGSize? { |
| 119 | + let parts = value.split(separator: ",") |
| 120 | + guard parts.count == 2, |
| 121 | + let width = Double(parts[0].trimmingCharacters(in: .whitespaces)), |
| 122 | + let height = Double(parts[1].trimmingCharacters(in: .whitespaces)) else { |
| 123 | + return nil |
| 124 | + } |
| 125 | + return CGSize(width: width, height: height) |
| 126 | + } |
| 127 | + |
| 128 | + /// Parses srcset string (e.g. "url1 300w, url2 600w") into entries. |
| 129 | + private static func parseSrcset(_ value: String) -> [SrcsetEntry] { |
| 130 | + value.split(separator: ",").compactMap { entry in |
| 131 | + let parts = entry.trimmingCharacters(in: .whitespaces).split(separator: " ") |
| 132 | + guard parts.count == 2, |
| 133 | + let url = URL(string: String(parts[0])), |
| 134 | + let widthStr = parts[1].dropLast().description.nilIfEmpty, |
| 135 | + let width = Int(widthStr) else { |
| 136 | + return nil |
| 137 | + } |
| 138 | + return SrcsetEntry(url: url, width: width) |
| 139 | + } |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +private extension String { |
| 144 | + var nilIfEmpty: String? { |
| 145 | + isEmpty ? nil : self |
| 146 | + } |
| 147 | +} |
0 commit comments