Skip to content

Commit 22cdcee

Browse files
committed
Update DSL concatenation to coalesce characters
1 parent 11b886a commit 22cdcee

File tree

4 files changed

+85
-14
lines changed

4 files changed

+85
-14
lines changed

Sources/_StringProcessing/Regex/Core.swift

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,6 @@ extension Regex {
223223

224224
@available(SwiftStdlib 5.7, *)
225225
extension Regex {
226-
// var root: DSLTree.Node {
227-
// program.tree.root
228-
// }
229-
230226
var list: DSLList {
231227
program.list
232228
}
@@ -245,23 +241,34 @@ extension Regex {
245241
return Regex<T>(list: list)
246242
}
247243

248-
func appending<T>(contentsOf node: some Collection<DSLTree.Node>) -> Regex<T> {
244+
func appending<T>(contentsOf node: [DSLTree.Node]) -> Regex<T> {
249245
var list = program.list
250246
list.append(contentsOf: node)
251247
return Regex<T>(list: list)
252248
}
253249

254-
func concatenating<T>(_ other: some Collection<DSLTree.Node>) -> Regex<T> {
255-
var nodes = program.list.nodes
256-
switch nodes[0] {
250+
func concatenating<T>(_ other: DSLList) -> Regex<T> {
251+
// TODO: Quick check to see if these copies are necessary?
252+
var list = program.list
253+
var other = other
254+
list.coalesce(withFirstAtomIn: &other)
255+
256+
// Sometimes coalescing consumes all of `other`
257+
guard !other.nodes.isEmpty else {
258+
return Regex<T>(list: list)
259+
}
260+
261+
// Use an existing concatenation if it's already the root;
262+
// otherwise, embed self and other in a new concatenation root.
263+
switch list.nodes[0] {
257264
case .concatenation(let children):
258-
nodes[0] = .concatenation(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1))
259-
nodes.append(contentsOf: other)
265+
list.nodes[0] = .concatenation(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1))
266+
list.nodes.append(contentsOf: other.nodes)
260267
default:
261-
nodes.insert(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: 2)), at: 0)
262-
nodes.append(contentsOf: other)
268+
list.nodes.insert(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: 2)), at: 0)
269+
list.nodes.append(contentsOf: other.nodes)
263270
}
264-
return Regex<T>(list: DSLList(nodes))
271+
return Regex<T>(list: list)
265272
}
266273

267274
func alternating<T>(with other: some Collection<DSLTree.Node>) -> Regex<T> {

Sources/_StringProcessing/Regex/DSLList.swift

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,58 @@ extension DSLList {
164164
break
165165
}
166166
}
167+
168+
func indexOfCoalescableAtom(startingAt position: Int, findLast: Bool = false) -> Int? {
169+
switch nodes[position] {
170+
case .concatenation(let children):
171+
var position = position + 1
172+
if findLast {
173+
for _ in 0..<(children.count - 1) {
174+
skipNode(&position)
175+
position += 1
176+
}
177+
}
178+
return indexOfCoalescableAtom(startingAt: position, findLast: findLast)
179+
case .ignoreCapturesInTypedOutput, .limitCaptureNesting:
180+
return indexOfCoalescableAtom(startingAt: position + 1, findLast: findLast)
181+
case .atom(let atom):
182+
if atom.literalCharacterValue != nil {
183+
return position
184+
}
185+
case .quotedLiteral:
186+
return position
187+
default:
188+
break
189+
}
190+
return nil
191+
}
192+
193+
mutating func coalesce(withFirstAtomIn other: inout DSLList) {
194+
// Find the last coalescable node in the LHS and the first in the RHS
195+
guard let prefixIndex = indexOfCoalescableAtom(startingAt: 0, findLast: true),
196+
let postfixIndex = other.indexOfCoalescableAtom(startingAt: 0),
197+
let prefixValue = nodes[prefixIndex].literalStringValue,
198+
let postfixValue = other.nodes[postfixIndex].literalStringValue
199+
else { return }
200+
201+
// Replace the prefix node with a coalesced version of the two
202+
nodes[prefixIndex] = .quotedLiteral(prefixValue + postfixValue)
203+
204+
// Remove the postfix node and fix up any parent concatenations
205+
other.nodes.remove(at: postfixIndex)
206+
var i = postfixIndex - 1
207+
Loop:
208+
while i >= 0 {
209+
switch other.nodes[i] {
210+
case .concatenation(let children):
211+
other.nodes[i] = .concatenation(.init(repeating: .empty, count: children.count - 1))
212+
break Loop
213+
case .limitCaptureNesting, .ignoreCapturesInTypedOutput:
214+
other.nodes.remove(at: i)
215+
i -= 1
216+
default:
217+
break Loop
218+
}
219+
}
220+
}
167221
}

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,16 @@ extension DSLTree.Atom {
514514
}
515515
}
516516

517+
extension DSLTree.Node {
518+
var literalStringValue: String? {
519+
switch self {
520+
case .atom(let a): return a.literalCharacterValue.map(String.init)
521+
case .quotedLiteral(let s): return s
522+
default: return nil
523+
}
524+
}
525+
}
526+
517527
extension DSLTree {
518528
struct Options {
519529
// TBD

Sources/_StringProcessing/Utility/RegexFactory.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public struct _RegexFactory {
3636
_ left: some RegexComponent,
3737
_ right: some RegexComponent
3838
) -> Regex<Output> {
39-
left.regex.concatenating(right.regex.program.list.nodes)
39+
left.regex.concatenating(right.regex.program.list)
4040
}
4141

4242
@available(SwiftStdlib 5.7, *)

0 commit comments

Comments
 (0)