From 549a36f74d34e96fddba35fb209b38873c6489f0 Mon Sep 17 00:00:00 2001 From: Ngo Quoc Dat Date: Mon, 1 Jun 2026 22:49:50 +0700 Subject: [PATCH] fix(import): make JSONImportPluginTests link by extracting pure parsing into a shared file (#1536) --- .../JSONImportPlugin/JSONImportParsing.swift | 182 ++++++++++++++++++ .../JSONImportPlugin/JSONImportPlugin.swift | 181 +---------------- TablePro.xcodeproj/project.pbxproj | 8 + .../Plugins/JSONImportPluginTests.swift | 51 +++-- 4 files changed, 221 insertions(+), 201 deletions(-) create mode 100644 Plugins/JSONImportPlugin/JSONImportParsing.swift diff --git a/Plugins/JSONImportPlugin/JSONImportParsing.swift b/Plugins/JSONImportPlugin/JSONImportParsing.swift new file mode 100644 index 000000000..bc7d7df10 --- /dev/null +++ b/Plugins/JSONImportPlugin/JSONImportParsing.swift @@ -0,0 +1,182 @@ +// +// JSONImportParsing.swift +// JSONImportPlugin +// +// Pure JSON parsing, row extraction, and field inference. Kept free of the +// plugin's loadable-bundle and SwiftUI surface so it can be compiled into the +// test target directly (a loadable .tableplugin cannot be linked by tests). +// + +import Foundation +import TableProPluginKit + +enum JSONImportParsing { + static func isLineDelimited(_ url: URL) -> Bool { + ["jsonl", "ndjson"].contains(url.pathExtension.lowercased()) + } + + static func parseRow(fromLine line: String) throws -> [String: PluginCellValue] { + let object = try JSONSerialization.jsonObject(with: Data(line.utf8)) + guard let dict = object as? [String: Any] else { + throw PluginImportError.importFailed("Each line must be a JSON object") + } + return convertRow(dict) + } + + static func parseRows(at url: URL, targetTable: String?) throws -> [[String: Any]] { + let data = try Data(contentsOf: url) + let object = try JSONSerialization.jsonObject(with: data) + return try extractRows(from: object, targetTable: targetTable) + } + + static func extractRows(from object: Any, targetTable: String?) throws -> [[String: Any]] { + if let array = object as? [Any] { + return array.compactMap { $0 as? [String: Any] } + } + + guard let dict = object as? [String: Any] else { + throw PluginImportError.importFailed("Expected a JSON array of objects or a table-keyed object") + } + + let tables = dict.compactMapValues { value -> [Any]? in + guard let array = value as? [Any] else { return nil } + return array.allSatisfy { $0 is [String: Any] } ? array : nil + } + let isTableWrapper = !tables.isEmpty && tables.count == dict.count + + guard isTableWrapper else { + return [dict] + } + + if let targetTable, let match = matchTable(in: tables, to: targetTable) { + return match.compactMap { $0 as? [String: Any] } + } + if tables.count == 1, let only = tables.values.first { + return only.compactMap { $0 as? [String: Any] } + } + throw PluginImportError.importFailed("The file contains multiple tables and none matches the target table") + } + + private static func matchTable(in tables: [String: [Any]], to target: String) -> [Any]? { + if let exact = tables.first(where: { $0.key.caseInsensitiveCompare(target) == .orderedSame }) { + return exact.value + } + let suffix = tables.first { key, _ in + key.split(separator: ".").last.map { $0.caseInsensitiveCompare(target) == .orderedSame } ?? false + } + return suffix?.value + } + + static func convertRow(_ row: [String: Any]) -> [String: PluginCellValue] { + row.mapValues(cellValue(from:)) + } + + static func cellValue(from json: Any) -> PluginCellValue { + switch json { + case is NSNull: + return .null + case let number as NSNumber: + if CFGetTypeID(number) == CFBooleanGetTypeID() { + return .text(number.boolValue ? "true" : "false") + } + return .text(number.stringValue) + case let string as String: + return .text(string) + default: + return .text(serialize(json)) + } + } + + private static func serialize(_ object: Any) -> String { + guard JSONSerialization.isValidJSONObject(object), + let data = try? JSONSerialization.data(withJSONObject: object, options: [.sortedKeys]), + let string = String(data: data, encoding: .utf8) + else { + return String(describing: object) + } + return string + } + + // MARK: - Source introspection + + static func sampleRawRows(at url: URL, targetTable: String?, limit: Int) throws -> [[String: Any]] { + if isLineDelimited(url) { + let handle = try FileHandle(forReadingFrom: url) + defer { try? handle.close() } + let text = String(bytes: handle.readData(ofLength: 256 * 1_024), encoding: .utf8) ?? "" + var rows: [[String: Any]] = [] + for line in text.split(separator: "\n") where rows.count < limit { + let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { continue } + if let object = try? JSONSerialization.jsonObject(with: Data(trimmed.utf8)) as? [String: Any] { + rows.append(object) + } + } + return rows + } + let object = try JSONSerialization.jsonObject(with: Data(contentsOf: url)) + return Array(try extractRows(from: object, targetTable: targetTable).prefix(limit)) + } + + static func detectFields(in rows: [[String: Any]]) -> [PluginImportField] { + var names: [String] = [] + var seen = Set() + var valuesByField: [String: [Any]] = [:] + for row in rows { + for (key, value) in row { + if seen.insert(key).inserted { names.append(key) } + valuesByField[key, default: []].append(value) + } + } + return names.sorted().map { name in + let nonNull = (valuesByField[name] ?? []).filter { !($0 is NSNull) } + return PluginImportField( + name: name, + sampleValue: nonNull.first.map(sampleString), + inferredType: inferType(from: nonNull) + ) + } + } + + static func inferType(from values: [Any]) -> PluginImportFieldType { + guard !values.isEmpty else { return .text } + var allNested = true + var allBoolean = true + var allInteger = true + var allNumber = true + for value in values { + if value is [Any] || value is [String: Any] { + allBoolean = false + allInteger = false + allNumber = false + } else { + allNested = false + if let number = value as? NSNumber { + if CFGetTypeID(number) == CFBooleanGetTypeID() { + allInteger = false + allNumber = false + } else { + allBoolean = false + if CFNumberIsFloatType(number) { allInteger = false } + } + } else { + allBoolean = false + allInteger = false + allNumber = false + } + } + } + if allNested { return .json } + if allBoolean { return .boolean } + if allInteger { return .integer } + if allNumber { return .real } + return .text + } + + private static func sampleString(_ value: Any) -> String { + switch cellValue(from: value) { + case .text(let string): return String(string.prefix(80)) + case .bytes, .null: return "" + } + } +} diff --git a/Plugins/JSONImportPlugin/JSONImportPlugin.swift b/Plugins/JSONImportPlugin/JSONImportPlugin.swift index 2f5be7019..5ac86569a 100644 --- a/Plugins/JSONImportPlugin/JSONImportPlugin.swift +++ b/Plugins/JSONImportPlugin/JSONImportPlugin.swift @@ -58,22 +58,22 @@ final class JSONImportPlugin: ImportFormatPlugin, SettablePlugin { try await sink.beginTransaction() } - if Self.isLineDelimited(url) { + if JSONImportParsing.isLineDelimited(url) { var lineNumber = 0 for try await line in url.lines { try progress.checkCancellation() lineNumber += 1 let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { continue } - let row = try Self.parseRow(fromLine: trimmed) + let row = try JSONImportParsing.parseRow(fromLine: trimmed) try await insert(row, into: sink, at: lineNumber, progress: progress, inserted: &inserted, skipped: &skipped, errors: &errors, maxErrors: maxErrors) } } else { - let rawRows = try Self.parseRows(at: url, targetTable: sink.targetTable) + let rawRows = try JSONImportParsing.parseRows(at: url, targetTable: sink.targetTable) for (index, rawRow) in rawRows.enumerated() { try progress.checkCancellation() - try await insert(Self.convertRow(rawRow), into: sink, at: index + 1, progress: progress, + try await insert(JSONImportParsing.convertRow(rawRow), into: sink, at: index + 1, progress: progress, inserted: &inserted, skipped: &skipped, errors: &errors, maxErrors: maxErrors) } } @@ -131,179 +131,10 @@ final class JSONImportPlugin: ImportFormatPlugin, SettablePlugin { } } - // MARK: - Parsing - - static func isLineDelimited(_ url: URL) -> Bool { - ["jsonl", "ndjson"].contains(url.pathExtension.lowercased()) - } - - static func parseRow(fromLine line: String) throws -> [String: PluginCellValue] { - let object = try JSONSerialization.jsonObject(with: Data(line.utf8)) - guard let dict = object as? [String: Any] else { - throw PluginImportError.importFailed("Each line must be a JSON object") - } - return convertRow(dict) - } - - static func parseRows(at url: URL, targetTable: String?) throws -> [[String: Any]] { - let data = try Data(contentsOf: url) - let object = try JSONSerialization.jsonObject(with: data) - return try extractRows(from: object, targetTable: targetTable) - } - - static func extractRows(from object: Any, targetTable: String?) throws -> [[String: Any]] { - if let array = object as? [Any] { - return array.compactMap { $0 as? [String: Any] } - } - - guard let dict = object as? [String: Any] else { - throw PluginImportError.importFailed("Expected a JSON array of objects or a table-keyed object") - } - - let tables = dict.compactMapValues { value -> [Any]? in - guard let array = value as? [Any] else { return nil } - return array.allSatisfy { $0 is [String: Any] } ? array : nil - } - let isTableWrapper = !tables.isEmpty && tables.count == dict.count - - guard isTableWrapper else { - return [dict] - } - - if let targetTable, let match = matchTable(in: tables, to: targetTable) { - return match.compactMap { $0 as? [String: Any] } - } - if tables.count == 1, let only = tables.values.first { - return only.compactMap { $0 as? [String: Any] } - } - throw PluginImportError.importFailed("The file contains multiple tables and none matches the target table") - } - - private static func matchTable(in tables: [String: [Any]], to target: String) -> [Any]? { - if let exact = tables.first(where: { $0.key.caseInsensitiveCompare(target) == .orderedSame }) { - return exact.value - } - let suffix = tables.first { key, _ in - key.split(separator: ".").last.map { $0.caseInsensitiveCompare(target) == .orderedSame } ?? false - } - return suffix?.value - } - - static func convertRow(_ row: [String: Any]) -> [String: PluginCellValue] { - row.mapValues(cellValue(from:)) - } - - static func cellValue(from json: Any) -> PluginCellValue { - switch json { - case is NSNull: - return .null - case let number as NSNumber: - if CFGetTypeID(number) == CFBooleanGetTypeID() { - return .text(number.boolValue ? "true" : "false") - } - return .text(number.stringValue) - case let string as String: - return .text(string) - default: - return .text(serialize(json)) - } - } - - private static func serialize(_ object: Any) -> String { - guard JSONSerialization.isValidJSONObject(object), - let data = try? JSONSerialization.data(withJSONObject: object, options: [.sortedKeys]), - let string = String(data: data, encoding: .utf8) - else { - return String(describing: object) - } - return string - } - // MARK: - Source introspection func detectSourceFields(at url: URL, targetTable: String?) throws -> [PluginImportField] { - let rows = try Self.sampleRawRows(at: url, targetTable: targetTable, limit: 200) - return Self.detectFields(in: rows) - } - - static func sampleRawRows(at url: URL, targetTable: String?, limit: Int) throws -> [[String: Any]] { - if isLineDelimited(url) { - let handle = try FileHandle(forReadingFrom: url) - defer { try? handle.close() } - let text = String(bytes: handle.readData(ofLength: 256 * 1_024), encoding: .utf8) ?? "" - var rows: [[String: Any]] = [] - for line in text.split(separator: "\n") where rows.count < limit { - let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { continue } - if let object = try? JSONSerialization.jsonObject(with: Data(trimmed.utf8)) as? [String: Any] { - rows.append(object) - } - } - return rows - } - let object = try JSONSerialization.jsonObject(with: Data(contentsOf: url)) - return Array(try extractRows(from: object, targetTable: targetTable).prefix(limit)) - } - - static func detectFields(in rows: [[String: Any]]) -> [PluginImportField] { - var names: [String] = [] - var seen = Set() - var valuesByField: [String: [Any]] = [:] - for row in rows { - for (key, value) in row { - if seen.insert(key).inserted { names.append(key) } - valuesByField[key, default: []].append(value) - } - } - return names.sorted().map { name in - let nonNull = (valuesByField[name] ?? []).filter { !($0 is NSNull) } - return PluginImportField( - name: name, - sampleValue: nonNull.first.map(sampleString), - inferredType: inferType(from: nonNull) - ) - } - } - - static func inferType(from values: [Any]) -> PluginImportFieldType { - guard !values.isEmpty else { return .text } - var allNested = true - var allBoolean = true - var allInteger = true - var allNumber = true - for value in values { - if value is [Any] || value is [String: Any] { - allBoolean = false - allInteger = false - allNumber = false - } else { - allNested = false - if let number = value as? NSNumber { - if CFGetTypeID(number) == CFBooleanGetTypeID() { - allInteger = false - allNumber = false - } else { - allBoolean = false - if CFNumberIsFloatType(number) { allInteger = false } - } - } else { - allBoolean = false - allInteger = false - allNumber = false - } - } - } - if allNested { return .json } - if allBoolean { return .boolean } - if allInteger { return .integer } - if allNumber { return .real } - return .text - } - - private static func sampleString(_ value: Any) -> String { - switch cellValue(from: value) { - case .text(let string): return String(string.prefix(80)) - case .bytes, .null: return "" - } + let rows = try JSONImportParsing.sampleRawRows(at: url, targetTable: targetTable, limit: 200) + return JSONImportParsing.detectFields(in: rows) } } diff --git a/TablePro.xcodeproj/project.pbxproj b/TablePro.xcodeproj/project.pbxproj index 144b5100f..f768fad5f 100644 --- a/TablePro.xcodeproj/project.pbxproj +++ b/TablePro.xcodeproj/project.pbxproj @@ -498,6 +498,13 @@ ); target = 5A86F001000000000 /* JSONImport */; }; + 5A86F002900000000 /* Exceptions for "Plugins/JSONImportPlugin" folder in "TableProTests" target */ = { + isa = PBXFileSystemSynchronizedBuildFileExceptionSet; + membershipExceptions = ( + JSONImportParsing.swift, + ); + target = 5ABCC5A62F43856700EAF3FC /* TableProTests */; + }; 5A87A000900000000 /* Exceptions for "Plugins/CassandraDriverPlugin" folder in "CassandraDriver" target */ = { isa = PBXFileSystemSynchronizedBuildFileExceptionSet; membershipExceptions = ( @@ -686,6 +693,7 @@ isa = PBXFileSystemSynchronizedRootGroup; exceptions = ( 5A86F001900000000 /* Exceptions for "Plugins/JSONImportPlugin" folder in "JSONImport" target */, + 5A86F002900000000 /* Exceptions for "Plugins/JSONImportPlugin" folder in "TableProTests" target */, ); path = Plugins/JSONImportPlugin; sourceTree = ""; diff --git a/TableProTests/Plugins/JSONImportPluginTests.swift b/TableProTests/Plugins/JSONImportPluginTests.swift index 93fea57fb..5257f5b8e 100644 --- a/TableProTests/Plugins/JSONImportPluginTests.swift +++ b/TableProTests/Plugins/JSONImportPluginTests.swift @@ -4,7 +4,6 @@ // import Foundation -@testable import JSONImport import TableProPluginKit import Testing @@ -27,64 +26,64 @@ struct JSONImportPluginTests { @Test("Null converts to a SQL null cell") func testNullValue() { - #expect(JSONImportPlugin.cellValue(from: NSNull()) == .null) + #expect(JSONImportParsing.cellValue(from: NSNull()) == .null) } @Test("Booleans convert to true/false text, not 1/0") func testBooleanValue() throws { - #expect(JSONImportPlugin.cellValue(from: try field(#"{"yes": true}"#, "yes")) == .text("true")) - #expect(JSONImportPlugin.cellValue(from: try field(#"{"no": false}"#, "no")) == .text("false")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"yes": true}"#, "yes")) == .text("true")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"no": false}"#, "no")) == .text("false")) } @Test("Numbers convert to their text form") func testNumberValues() throws { - #expect(JSONImportPlugin.cellValue(from: try field(#"{"i": 42}"#, "i")) == .text("42")) - #expect(JSONImportPlugin.cellValue(from: try field(#"{"d": 3.5}"#, "d")) == .text("3.5")) - #expect(JSONImportPlugin.cellValue(from: try field(#"{"big": 9007199254740993}"#, "big")) == .text("9007199254740993")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"i": 42}"#, "i")) == .text("42")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"d": 3.5}"#, "d")) == .text("3.5")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"big": 9007199254740993}"#, "big")) == .text("9007199254740993")) } @Test("Strings pass through unchanged") func testStringValue() { - #expect(JSONImportPlugin.cellValue(from: "hello") == .text("hello")) + #expect(JSONImportParsing.cellValue(from: "hello") == .text("hello")) } @Test("Nested objects and arrays serialize to JSON text") func testNestedValue() throws { - #expect(JSONImportPlugin.cellValue(from: try field(#"{"tags": ["a", "b"]}"#, "tags")) == .text("[\"a\",\"b\"]")) - #expect(JSONImportPlugin.cellValue(from: try field(#"{"meta": {"k": 1}}"#, "meta")) == .text("{\"k\":1}")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"tags": ["a", "b"]}"#, "tags")) == .text("[\"a\",\"b\"]")) + #expect(JSONImportParsing.cellValue(from: try field(#"{"meta": {"k": 1}}"#, "meta")) == .text("{\"k\":1}")) } // MARK: - Row extraction @Test("Bare array of objects yields rows") func testBareArray() throws { - let rows = try JSONImportPlugin.extractRows(from: try anyValue("[{\"id\":1},{\"id\":2}]"), targetTable: nil) + let rows = try JSONImportParsing.extractRows(from: try anyValue("[{\"id\":1},{\"id\":2}]"), targetTable: nil) #expect(rows.count == 2) } @Test("Single-key table wrapper yields that table's rows") func testSingleKeyWrapper() throws { - let rows = try JSONImportPlugin.extractRows(from: try anyValue(#"{"users":[{"id":1}]}"#), targetTable: nil) + let rows = try JSONImportParsing.extractRows(from: try anyValue(#"{"users":[{"id":1}]}"#), targetTable: nil) #expect(rows.count == 1) } @Test("Multi-table wrapper selects the array matching the target table") func testMultiTableWrapperMatchesTarget() throws { let json = #"{"users":[{"id":1}],"orders":[{"id":1},{"id":2}]}"# - let rows = try JSONImportPlugin.extractRows(from: try anyValue(json), targetTable: "orders") + let rows = try JSONImportParsing.extractRows(from: try anyValue(json), targetTable: "orders") #expect(rows.count == 2) } @Test("Schema-qualified wrapper key matches the unqualified target table") func testQualifiedKeyMatch() throws { - let rows = try JSONImportPlugin.extractRows(from: try anyValue(#"{"public.users":[{"id":1}]}"#), targetTable: "users") + let rows = try JSONImportParsing.extractRows(from: try anyValue(#"{"public.users":[{"id":1}]}"#), targetTable: "users") #expect(rows.count == 1) } @Test("Multi-table wrapper with no match throws") func testMultiTableNoMatchThrows() { #expect(throws: PluginImportError.self) { - _ = try JSONImportPlugin.extractRows( + _ = try JSONImportParsing.extractRows( from: try anyValue(#"{"users":[{"id":1}],"orders":[{"id":1}]}"#), targetTable: "products" ) @@ -93,7 +92,7 @@ struct JSONImportPluginTests { @Test("A lone JSON object is treated as a single row") func testSingleObjectRow() throws { - let rows = try JSONImportPlugin.extractRows(from: try anyValue(#"{"id":1,"tags":["a"]}"#), targetTable: nil) + let rows = try JSONImportParsing.extractRows(from: try anyValue(#"{"id":1,"tags":["a"]}"#), targetTable: nil) #expect(rows.count == 1) #expect(rows[0]["id"] != nil) } @@ -102,7 +101,7 @@ struct JSONImportPluginTests { @Test("A JSON object line parses to a row") func testNdjsonLine() throws { - let row = try JSONImportPlugin.parseRow(fromLine: #"{"id":1,"name":"x"}"#) + let row = try JSONImportParsing.parseRow(fromLine: #"{"id":1,"name":"x"}"#) #expect(row["id"] == .text("1")) #expect(row["name"] == .text("x")) } @@ -110,7 +109,7 @@ struct JSONImportPluginTests { @Test("A non-object line throws") func testNdjsonNonObjectThrows() { #expect(throws: PluginImportError.self) { - _ = try JSONImportPlugin.parseRow(fromLine: "[1, 2, 3]") + _ = try JSONImportParsing.parseRow(fromLine: "[1, 2, 3]") } } @@ -118,7 +117,7 @@ struct JSONImportPluginTests { @Test("Rows shaped like JSONExportPlugin output convert losslessly") func testExportShapeRoundTrip() throws { - let row = JSONImportPlugin.convertRow( + let row = JSONImportParsing.convertRow( try object(#"{"id":1,"name":"Alice","deleted_at":null,"score":3.14,"active":true}"#) ) #expect(row["id"] == .text("1")) @@ -136,39 +135,39 @@ struct JSONImportPluginTests { @Test("Inference: all integers") func testInferInteger() throws { - #expect(JSONImportPlugin.inferType(from: try array("[1, 2, 3]")) == .integer) + #expect(JSONImportParsing.inferType(from: try array("[1, 2, 3]")) == .integer) } @Test("Inference: any decimal makes the field real") func testInferReal() throws { - #expect(JSONImportPlugin.inferType(from: try array("[1, 2.5, 3]")) == .real) + #expect(JSONImportParsing.inferType(from: try array("[1, 2.5, 3]")) == .real) } @Test("Inference: all booleans") func testInferBoolean() throws { - #expect(JSONImportPlugin.inferType(from: try array("[true, false]")) == .boolean) + #expect(JSONImportParsing.inferType(from: try array("[true, false]")) == .boolean) } @Test("Inference: all-nested values are json") func testInferJSON() throws { - #expect(JSONImportPlugin.inferType(from: try array(#"[{"a":1}, [1,2]]"#)) == .json) + #expect(JSONImportParsing.inferType(from: try array(#"[{"a":1}, [1,2]]"#)) == .json) } @Test("Inference: mixed types fall back to text") func testInferText() throws { - #expect(JSONImportPlugin.inferType(from: try array(#"["a", 1]"#)) == .text) + #expect(JSONImportParsing.inferType(from: try array(#"["a", 1]"#)) == .text) } @Test("Inference: empty values are text") func testInferEmpty() { - #expect(JSONImportPlugin.inferType(from: []) == .text) + #expect(JSONImportParsing.inferType(from: []) == .text) } @Test("detectFields reports sorted fields with inferred types and a sample") func testDetectFields() throws { let raw = #"[{"id":1,"name":"a","active":true},{"id":2,"name":"b","active":false}]"# let rows = try #require(try JSONSerialization.jsonObject(with: Data(raw.utf8)) as? [[String: Any]]) - let fields = JSONImportPlugin.detectFields(in: rows) + let fields = JSONImportParsing.detectFields(in: rows) #expect(fields.map(\.name) == ["active", "id", "name"]) #expect(fields.first { $0.name == "id" }?.inferredType == .integer) #expect(fields.first { $0.name == "active" }?.inferredType == .boolean)