diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ccd5af..aa63ab0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,11 +6,11 @@ All notable changes to this project will be documented in this file.
### Fixed
- **Move markup Word compatibility (Issue #96)** - Documents with move operations no longer cause Word "unreadable content" warnings
- - Added `SimplifyMoveMarkup` setting to convert native move markup (`w:moveFrom`/`w:moveTo`) to simple `w:del`/`w:ins`
- - Changed `DetectMoves` default from `true` to `false` until the underlying ID collision bug is fixed in Phase II
- - Root cause identified: `FixUpRevMarkIds()` was overwriting IDs of `w:del`/`w:ins` after `FixUpRevisionIds()` had assigned unique IDs, causing collisions with move element IDs
- - Users who want move detection with Word compatibility should set both `DetectMoves = true` and `SimplifyMoveMarkup = true`
- - Trade-off: With `SimplifyMoveMarkup = true`, users lose the visual "moved" distinction (green double-underline) but get guaranteed Word compatibility
+ - Root cause: `FixUpRevMarkIds()` was overwriting IDs of `w:del`/`w:ins` after `FixUpRevisionIds()` had already assigned unique IDs, causing collisions with move element IDs
+ - Fix: Removed redundant `FixUpRevMarkIds()` call - `FixUpRevisionIds()` already handles all revision element IDs correctly
+ - Added `SimplifyMoveMarkup` setting to optionally convert move markup to simple `w:del`/`w:ins` if desired
+ - Added comprehensive ID uniqueness tests to prevent regression
+ - `DetectMoves` now defaults to `true` (move detection is safe to use)
- **Footnote/endnote numbering** - Fixed footnotes and endnotes displaying raw XML IDs instead of sequential display numbers
- Per ECMA-376, `w:id` is a reference identifier, not the display number
- Added `FootnoteNumberingTracker` class to scan document and build XML ID → display number mapping
diff --git a/CLAUDE.md b/CLAUDE.md
index 0ed021c..4d13e3c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -139,17 +139,12 @@ DocumentBuilder.BuildDocument(sources, outputPath);
- `AuthorForRevisions` - Author name for tracked changes
- `DetailThreshold` - 0.0-1.0, lower = more detailed comparison (default: 0.15)
- `CaseInsensitive` - Case-insensitive comparison
-- `DetectMoves` - Enable move detection in `GetRevisions()` (default: false - see warning below)
-- `SimplifyMoveMarkup` - Convert move markup to del/ins for Word compatibility (default: false)
+- `DetectMoves` - Enable move detection in `GetRevisions()` (default: true)
+- `SimplifyMoveMarkup` - Convert move markup to del/ins (default: false)
- `MoveSimilarityThreshold` - Jaccard similarity threshold for moves (default: 0.8)
- `MoveMinimumWordCount` - Minimum words for move detection (default: 3)
- `DetectFormatChanges` - Enable format change detection (default: true)
-**WARNING: Move Detection Known Issue (Issue #96)** - Move markup can cause Word to display "unreadable content" warnings due to an ID collision bug. Until Phase II of the fix is complete:
-- `DetectMoves` defaults to `false` to avoid the issue
-- If you need move detection, set both `DetectMoves = true` AND `SimplifyMoveMarkup = true`
-- With `SimplifyMoveMarkup = true`, moves are converted to regular del/ins (loses green move styling but ensures Word compatibility)
-
Move detection produces **native Word move markup** (`w:moveFrom`/`w:moveTo`) when `DetectMoves` is enabled:
- The comparer analyzes deleted/inserted content blocks for similarity after LCS comparison
- Matching pairs (≥80% Jaccard similarity by default) are converted to move markup
diff --git a/Docxodus.Tests/WmlComparerMoveDetectionTests.cs b/Docxodus.Tests/WmlComparerMoveDetectionTests.cs
index 97317b9..fce2808 100644
--- a/Docxodus.Tests/WmlComparerMoveDetectionTests.cs
+++ b/Docxodus.Tests/WmlComparerMoveDetectionTests.cs
@@ -8,6 +8,7 @@
using System.Xml.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Validation;
using DocumentFormat.OpenXml.Wordprocessing;
using Docxodus;
using Xunit;
@@ -1175,13 +1176,13 @@ public void SimplifyMoveMarkup_WhenFalse_ShouldPreserveMoveElements()
}
///
- /// Verifies that DetectMoves defaults to false for safety.
+ /// Verifies that DetectMoves defaults to true.
///
[Fact]
- public void DetectMoves_ShouldDefaultToFalse()
+ public void DetectMoves_ShouldDefaultToTrue()
{
var settings = new WmlComparerSettings();
- Assert.False(settings.DetectMoves, "DetectMoves should default to false until Phase II fix is complete");
+ Assert.True(settings.DetectMoves, "DetectMoves should default to true");
}
///
@@ -1195,5 +1196,413 @@ public void SimplifyMoveMarkup_ShouldDefaultToFalse()
}
#endregion
+
+ #region ID Uniqueness Tests (Issue #96 Phase II)
+
+ ///
+ /// Verifies that all revision IDs are unique across the document when moves are present.
+ /// This is the core test for Issue #96 - duplicate IDs cause Word "unreadable content" warnings.
+ ///
+ [Fact]
+ public void MoveMarkup_AllRevisionIdsShouldBeUnique()
+ {
+ // Arrange: Create documents with moved content
+ var doc1 = CreateDocumentWithParagraphs(
+ "This is paragraph A with enough words for move detection.",
+ "This is paragraph B with sufficient content here.",
+ "This is paragraph C that stays in place.",
+ "This is paragraph D with additional content."
+ );
+ var doc2 = CreateDocumentWithParagraphs(
+ "This is paragraph B with sufficient content here.",
+ "This is paragraph A with enough words for move detection.",
+ "This is paragraph C that stays in place but modified slightly.",
+ "This is paragraph D with additional content."
+ );
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3
+ };
+
+ // Act
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Extract all revision IDs from all content parts
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var doc = WordprocessingDocument.Open(stream, false);
+
+ var allIds = new List();
+ XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+ var revisionElements = new[] { "ins", "del", "moveFrom", "moveTo",
+ "moveFromRangeStart", "moveFromRangeEnd", "moveToRangeStart", "moveToRangeEnd", "rPrChange" };
+
+ // Check main document
+ var mainXDoc = doc.MainDocumentPart.GetXDocument();
+ foreach (var elemName in revisionElements)
+ {
+ allIds.AddRange(mainXDoc.Descendants(w + elemName)
+ .Select(e => e.Attribute(w + "id")?.Value)
+ .Where(id => id != null));
+ }
+
+ // Check footnotes if present
+ if (doc.MainDocumentPart.FootnotesPart != null)
+ {
+ var fnXDoc = doc.MainDocumentPart.FootnotesPart.GetXDocument();
+ foreach (var elemName in revisionElements)
+ {
+ allIds.AddRange(fnXDoc.Descendants(w + elemName)
+ .Select(e => e.Attribute(w + "id")?.Value)
+ .Where(id => id != null));
+ }
+ }
+
+ // Check endnotes if present
+ if (doc.MainDocumentPart.EndnotesPart != null)
+ {
+ var enXDoc = doc.MainDocumentPart.EndnotesPart.GetXDocument();
+ foreach (var elemName in revisionElements)
+ {
+ allIds.AddRange(enXDoc.Descendants(w + elemName)
+ .Select(e => e.Attribute(w + "id")?.Value)
+ .Where(id => id != null));
+ }
+ }
+
+ // Assert: No duplicate IDs (excluding range start/end pairs which intentionally share IDs)
+ // For range elements, start and end share the same ID by design
+ // But NO other element should share an ID with any other element
+ var duplicates = allIds.GroupBy(x => x)
+ .Where(g => g.Count() > 2) // Allow pairs (start/end) but not more
+ .Select(g => new { Id = g.Key, Count = g.Count() })
+ .ToList();
+
+ Assert.True(duplicates.Count == 0,
+ $"Found revision IDs used more than twice (only range pairs should share IDs): " +
+ $"{string.Join(", ", duplicates.Select(d => $"id={d.Id} count={d.Count}"))}");
+ }
+
+ ///
+ /// Verifies that move names properly pair moveFrom and moveTo elements.
+ /// Each move name should appear exactly once in moveFromRangeStart and once in moveToRangeStart.
+ /// Note: Consecutive paragraphs may be grouped as a single move block.
+ ///
+ [Fact]
+ public void MoveMarkup_MoveNamesShouldProperlyPairSourceAndDestination()
+ {
+ // Arrange: Create documents with moved content
+ var doc1 = CreateDocumentWithParagraphs(
+ "This is paragraph A with enough words for move detection.",
+ "This is paragraph B with sufficient content here."
+ );
+ var doc2 = CreateDocumentWithParagraphs(
+ "This is paragraph B with sufficient content here.",
+ "This is paragraph A with enough words for move detection."
+ );
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3
+ };
+
+ // Act
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Extract move names
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var doc = WordprocessingDocument.Open(stream, false);
+
+ XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+ var mainXDoc = doc.MainDocumentPart.GetXDocument();
+
+ var moveFromNames = mainXDoc.Descendants(w + "moveFromRangeStart")
+ .Select(e => e.Attribute(w + "name")?.Value)
+ .Where(n => n != null)
+ .ToList();
+
+ var moveToNames = mainXDoc.Descendants(w + "moveToRangeStart")
+ .Select(e => e.Attribute(w + "name")?.Value)
+ .Where(n => n != null)
+ .ToList();
+
+ // Assert: Should have at least one move detected
+ Assert.True(moveFromNames.Count > 0, "Expected at least one moveFromRangeStart with w:name");
+ Assert.True(moveToNames.Count > 0, "Expected at least one moveToRangeStart with w:name");
+
+ // Assert: moveFrom and moveTo names should match (same names, same count)
+ Assert.True(moveFromNames.OrderBy(x => x).SequenceEqual(moveToNames.OrderBy(x => x)),
+ $"Move names should match between moveFrom and moveTo. " +
+ $"From: [{string.Join(", ", moveFromNames)}], To: [{string.Join(", ", moveToNames)}]");
+
+ // Assert: No empty or null move names
+ Assert.DoesNotContain("", moveFromNames);
+ Assert.DoesNotContain("", moveToNames);
+ Assert.True(moveFromNames.All(n => n.StartsWith("move")),
+ "All move names should follow the 'moveN' pattern");
+ }
+
+ ///
+ /// Verifies that a document with moves and other changes has unique IDs.
+ /// This specifically tests the scenario that caused Issue #96.
+ ///
+ [Fact]
+ public void MoveMarkup_WithMixedChanges_ShouldHaveUniqueIds()
+ {
+ // Arrange: Create documents with moves AND other ins/del changes
+ var doc1 = CreateDocumentWithParagraphs(
+ "This paragraph will be moved to a new location.",
+ "This paragraph stays but will be modified here.",
+ "This paragraph will be deleted entirely from doc.",
+ "This is static content that does not change."
+ );
+ var doc2 = CreateDocumentWithParagraphs(
+ "This paragraph stays but has been changed now.",
+ "This is static content that does not change.",
+ "This paragraph will be moved to a new location.",
+ "This is a completely new paragraph inserted."
+ );
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3
+ };
+
+ // Act
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Extract all revision IDs
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var doc = WordprocessingDocument.Open(stream, false);
+
+ XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+ var mainXDoc = doc.MainDocumentPart.GetXDocument();
+
+ // Get IDs from different element types
+ var insIds = mainXDoc.Descendants(w + "ins")
+ .Select(e => e.Attribute(w + "id")?.Value).Where(id => id != null).ToList();
+ var delIds = mainXDoc.Descendants(w + "del")
+ .Select(e => e.Attribute(w + "id")?.Value).Where(id => id != null).ToList();
+ var moveFromIds = mainXDoc.Descendants(w + "moveFrom")
+ .Select(e => e.Attribute(w + "id")?.Value).Where(id => id != null).ToList();
+ var moveToIds = mainXDoc.Descendants(w + "moveTo")
+ .Select(e => e.Attribute(w + "id")?.Value).Where(id => id != null).ToList();
+
+ // Combine non-range IDs (these should all be unique)
+ var nonRangeIds = insIds.Concat(delIds).Concat(moveFromIds).Concat(moveToIds).ToList();
+
+ // Check for duplicates
+ var duplicates = nonRangeIds.GroupBy(x => x)
+ .Where(g => g.Count() > 1)
+ .ToList();
+
+ Assert.True(duplicates.Count == 0,
+ $"Found duplicate IDs among ins/del/moveFrom/moveTo elements: " +
+ $"{string.Join(", ", duplicates.Select(g => $"id={g.Key}"))}. " +
+ $"This is the Issue #96 bug - FixUpRevMarkIds was overwriting IDs.");
+ }
+
+ #endregion
+
+ #region Stress Tests (Issue #96)
+
+ ///
+ /// Generates a paragraph with unique content for stress testing.
+ ///
+ private static string GenerateStressTestParagraph(int index)
+ {
+ var templates = new[]
+ {
+ "Paragraph {0}: This document section contains important information about the project requirements and specifications. Reference ID: {1}",
+ "Section {0}: The following content describes the technical implementation details for the proposed system architecture. Doc: {1}",
+ "Item {0}: According to the agreement dated herein, the parties shall comply with all terms and conditions specified. Contract: {1}",
+ "Clause {0}: The licensee agrees to use the software only for purposes permitted under this license agreement. License: {1}",
+ "Article {0}: This paragraph establishes the fundamental principles governing the relationship between the entities. Ref: {1}",
+ "Point {0}: The data processing activities shall be conducted in accordance with applicable privacy regulations. GDPR: {1}",
+ "Note {0}: All modifications to this document must be tracked and approved by the designated review committee. Rev: {1}",
+ "Entry {0}: The financial statements have been prepared in accordance with generally accepted accounting principles. GAAP: {1}",
+ "Record {0}: This memorandum summarizes the key decisions made during the executive committee meeting. Minutes: {1}",
+ "Statement {0}: The undersigned hereby certifies that all information provided is true and accurate. Cert: {1}",
+ "Provision {0}: Notwithstanding the foregoing, the obligations set forth herein shall survive termination. Legal: {1}",
+ "Stipulation {0}: The contractor shall deliver all work products by the specified deadline. Deadline: {1}",
+ "Requirement {0}: The system shall support concurrent users and maintain response times under load. Perf: {1}",
+ "Specification {0}: All API endpoints must implement proper authentication and authorization. Security: {1}",
+ "Definition {0}: For purposes of this agreement, the following terms shall have the meanings ascribed. Terms: {1}",
+ };
+
+ var template = templates[index % templates.Length];
+ return string.Format(template, index, $"DOC-{index:D4}-{Guid.NewGuid().ToString().Substring(0, 8).ToUpper()}");
+ }
+
+ ///
+ /// Stress test for Issue #96: Validates that revision IDs remain unique even with
+ /// dozens of moves and hundreds of other changes. Uses fixed seed for reproducibility.
+ ///
+ [Theory]
+ [InlineData(50, 15, 30, "Small")] // 50 paragraphs, ~15 moves, ~30 other changes
+ [InlineData(100, 25, 50, "Medium")] // 100 paragraphs, ~25 moves, ~50 other changes
+ [InlineData(200, 40, 100, "Large")] // 200 paragraphs, ~40 moves, ~100 other changes
+ public void StressTest_ManyMovesAndChanges_ShouldHaveUniqueIds(
+ int paragraphCount, int moveCount, int changeCount, string testName)
+ {
+ // Arrange: Use fixed seed for reproducibility
+ var rng = new Random(42);
+
+ // Generate original document with numbered paragraphs
+ var originalParagraphs = Enumerable.Range(1, paragraphCount)
+ .Select(i => GenerateStressTestParagraph(i))
+ .ToList();
+
+ // Create modified version with moves and changes
+ var modifiedParagraphs = new List(originalParagraphs);
+
+ // Apply moves: pick random paragraphs and move them to new positions
+ var availableForMove = Enumerable.Range(0, modifiedParagraphs.Count).ToList();
+ for (int i = 0; i < moveCount && availableForMove.Count > 2; i++)
+ {
+ int fromIdx = availableForMove[rng.Next(availableForMove.Count)];
+ availableForMove.Remove(fromIdx);
+
+ var para = modifiedParagraphs[fromIdx];
+ modifiedParagraphs.RemoveAt(fromIdx);
+
+ // Adjust available indices after removal
+ availableForMove = availableForMove.Select(x => x > fromIdx ? x - 1 : x).ToList();
+
+ int toIdx = rng.Next(modifiedParagraphs.Count + 1);
+ modifiedParagraphs.Insert(toIdx, para);
+
+ // Adjust available indices after insertion
+ availableForMove = availableForMove.Select(x => x >= toIdx ? x + 1 : x).ToList();
+ }
+
+ // Apply deletions
+ int deleteCount = changeCount / 3;
+ for (int i = 0; i < deleteCount && modifiedParagraphs.Count > paragraphCount / 2; i++)
+ {
+ int idx = rng.Next(modifiedParagraphs.Count);
+ modifiedParagraphs.RemoveAt(idx);
+ }
+
+ // Apply insertions
+ int insertCount = changeCount / 3;
+ for (int i = 0; i < insertCount; i++)
+ {
+ int idx = rng.Next(modifiedParagraphs.Count + 1);
+ modifiedParagraphs.Insert(idx, $"[NEW-{i + 1}] This is a newly inserted paragraph with enough words to be meaningful. " +
+ $"It contains various content including technical terms, legal jargon, and general prose. " +
+ $"The purpose is to test the comparison engine with substantial insertions. Reference: INS-{Guid.NewGuid():N}");
+ }
+
+ // Apply modifications (change words in existing paragraphs)
+ int modifyCount = changeCount / 3;
+ for (int i = 0; i < modifyCount && modifiedParagraphs.Count > 0; i++)
+ {
+ int idx = rng.Next(modifiedParagraphs.Count);
+ var para = modifiedParagraphs[idx];
+ para = para.Replace("paragraph", "section")
+ .Replace("content", "material")
+ .Replace("document", "file");
+ if (!para.Contains("[MODIFIED]"))
+ {
+ para = "[MODIFIED] " + para;
+ }
+ modifiedParagraphs[idx] = para;
+ }
+
+ // Create documents
+ var doc1 = CreateDocumentWithParagraphs(originalParagraphs.ToArray());
+ var doc2 = CreateDocumentWithParagraphs(modifiedParagraphs.ToArray());
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.75,
+ MoveMinimumWordCount = 5,
+ AuthorForRevisions = "StressTest"
+ };
+
+ // Act
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Assert: Analyze results
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+ var mainXDoc = wDoc.MainDocumentPart.GetXDocument();
+
+ XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+
+ // Collect all revision IDs
+ var revisionElements = new[] { "ins", "del", "moveFrom", "moveTo", "rPrChange" };
+ var allIds = new List<(string Id, string Type)>();
+
+ foreach (var elemName in revisionElements)
+ {
+ foreach (var elem in mainXDoc.Descendants(w + elemName))
+ {
+ var id = elem.Attribute(w + "id")?.Value;
+ if (id != null)
+ {
+ allIds.Add((id, elemName));
+ }
+ }
+ }
+
+ // Check for duplicates - THE CRITICAL TEST
+ var duplicates = allIds.GroupBy(x => x.Id)
+ .Where(g => g.Count() > 1)
+ .ToList();
+
+ Assert.True(duplicates.Count == 0,
+ $"StressTest {testName}: Found {duplicates.Count} duplicate revision IDs. " +
+ $"First duplicates: {string.Join(", ", duplicates.Take(5).Select(d => $"id={d.Key}:[{string.Join(",", d.Select(x => x.Type))}]"))}. " +
+ $"Total elements: {allIds.Count}");
+
+ // Check move name pairing
+ var moveFromNames = mainXDoc.Descendants(w + "moveFromRangeStart")
+ .Select(e => e.Attribute(w + "name")?.Value)
+ .Where(n => !string.IsNullOrEmpty(n))
+ .ToList();
+
+ var moveToNames = mainXDoc.Descendants(w + "moveToRangeStart")
+ .Select(e => e.Attribute(w + "name")?.Value)
+ .Where(n => !string.IsNullOrEmpty(n))
+ .ToList();
+
+ // Validate all names are paired
+ var unpairedFrom = moveFromNames.Except(moveToNames).ToList();
+ var unpairedTo = moveToNames.Except(moveFromNames).ToList();
+
+ Assert.True(!unpairedFrom.Any() && !unpairedTo.Any(),
+ $"StressTest {testName}: Unpaired move names found. " +
+ $"From without To: [{string.Join(", ", unpairedFrom)}], " +
+ $"To without From: [{string.Join(", ", unpairedTo)}]");
+
+ // OpenXML validation
+ var validator = new OpenXmlValidator(FileFormatVersions.Office2019);
+ var errors = validator.Validate(wDoc).ToList();
+
+ // Note: Some validation errors may be acceptable (e.g., missing optional parts)
+ // We focus on ensuring no critical structural errors
+ var criticalErrors = errors
+ .Where(e => e.ErrorType == DocumentFormat.OpenXml.Validation.ValidationErrorType.Schema)
+ .ToList();
+
+ Assert.True(criticalErrors.Count == 0,
+ $"StressTest {testName}: OpenXML schema validation failed with {criticalErrors.Count} errors. " +
+ $"First errors: {string.Join("; ", criticalErrors.Take(3).Select(e => e.Description))}");
+ }
+
+ #endregion
}
}
diff --git a/Docxodus/WmlComparer.cs b/Docxodus/WmlComparer.cs
index 2301a6f..c77309d 100644
--- a/Docxodus/WmlComparer.cs
+++ b/Docxodus/WmlComparer.cs
@@ -63,23 +63,17 @@ public class WmlComparerSettings
public DirectoryInfo DebugTempFileDi;
///
- /// Whether to detect and mark moved content in GetRevisions(). Default: false.
+ /// Whether to detect and mark moved content in GetRevisions(). Default: true.
/// When enabled, deletion/insertion pairs with similar text are marked as moves
/// using native w:moveFrom/w:moveTo markup.
- ///
- /// WARNING: Move markup can cause Word to display "unreadable content" warnings
- /// due to a known ID collision bug (Issue #96). Until this is fixed in Phase II,
- /// it is recommended to either keep this false, or set SimplifyMoveMarkup = true
- /// when enabling move detection.
///
- public bool DetectMoves = false;
+ public bool DetectMoves = true;
///
/// When true, converts native move markup (w:moveFrom/w:moveTo) to simple
- /// delete/insert markup (w:del/w:ins) after comparison. This ensures Word
- /// compatibility at the cost of losing the visual "moved" distinction.
+ /// delete/insert markup (w:del/w:ins) after comparison. This trades the
+ /// visual "moved" distinction for simpler markup.
///
- /// Use this setting when DetectMoves = true but Word compatibility is required.
/// Default: false.
///
public bool SimplifyMoveMarkup = false;
@@ -1854,7 +1848,8 @@ private static WmlDocument ProduceDocumentWithTrackedRevisions(WmlComparerSettin
wDocWithRevisions.MainDocumentPart.PutXDocument();
FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions);
- FixUpRevMarkIds(wDocWithRevisions);
+ // Note: FixUpRevMarkIds was removed here - it was causing ID collisions with move
+ // elements (Issue #96). FixUpRevisionIds already handles all revision IDs properly.
// Convert move markup to simple del/ins if requested (Issue #96 workaround)
// This runs after all ID fixups to ensure proper conversion
diff --git a/TestFiles/Issue96/.gitignore b/TestFiles/Issue96/.gitignore
new file mode 100644
index 0000000..e050548
--- /dev/null
+++ b/TestFiles/Issue96/.gitignore
@@ -0,0 +1 @@
+*.docx
diff --git a/TestFiles/Issue96/Issue96BugReproduction.cs b/TestFiles/Issue96/Issue96BugReproduction.cs
new file mode 100644
index 0000000..2a6cba3
--- /dev/null
+++ b/TestFiles/Issue96/Issue96BugReproduction.cs
@@ -0,0 +1,253 @@
+// Issue #96 Bug Reproduction Test
+// This specifically tests the scenario that caused the "unreadable content" warning:
+// Move operations combined with regular ins/del that would have caused ID collisions
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Xml.Linq;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Validation;
+using Docxodus;
+
+class Issue96BugReproduction
+{
+ static XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+
+ static void Main(string[] args)
+ {
+ Console.WriteLine("╔══════════════════════════════════════════════════════════════╗");
+ Console.WriteLine("║ Issue #96 Bug Reproduction Test ║");
+ Console.WriteLine("║ Move operations + ins/del that caused ID collisions ║");
+ Console.WriteLine("╚══════════════════════════════════════════════════════════════╝\n");
+
+ var outputDir = Path.GetDirectoryName(typeof(Issue96BugReproduction).Assembly.Location)
+ ?? Directory.GetCurrentDirectory();
+
+ // Create documents that will produce BOTH moves AND regular ins/del
+ // This is the scenario that triggered the bug
+ // The first paragraph is IDENTICAL in both docs but at different positions (MOVE)
+ // The last paragraph is completely different (DEL + INS)
+ var doc1 = CreateDocument(new[] {
+ "The quick brown fox jumps over the lazy sleeping dog in the park today.",
+ "Static content that does not change at all in this document test.",
+ "This paragraph will be deleted and replaced with something new."
+ });
+
+ var doc2 = CreateDocument(new[] {
+ "Static content that does not change at all in this document test.",
+ "The quick brown fox jumps over the lazy sleeping dog in the park today.",
+ "This is a completely new paragraph that was inserted here instead."
+ });
+
+ Console.WriteLine("Document 1 (Original):");
+ Console.WriteLine(" [1] The quick brown fox jumps over the lazy sleeping dog...");
+ Console.WriteLine(" [2] Static content that does not change...");
+ Console.WriteLine(" [3] Another static paragraph...");
+ Console.WriteLine(" [4] This paragraph has some text that will be partially modified.");
+ Console.WriteLine();
+ Console.WriteLine("Document 2 (Modified):");
+ Console.WriteLine(" [1] Static content that does not change...");
+ Console.WriteLine(" [2] Another static paragraph...");
+ Console.WriteLine(" [3] The quick brown fox jumps over the lazy sleeping dog... (MOVED)");
+ Console.WriteLine(" [4] This paragraph has DIFFERENT text... (MODIFIED)");
+ Console.WriteLine();
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3,
+ AuthorForRevisions = "Issue96Test"
+ };
+
+ Console.WriteLine("Comparing with DetectMoves=true, SimplifyMoveMarkup=false...\n");
+
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Save output
+ var outputPath = Path.Combine(outputDir, "Issue96_BugRepro_Output.docx");
+ File.WriteAllBytes(outputPath, compared.DocumentByteArray);
+ Console.WriteLine($"📄 Output saved: {outputPath}\n");
+
+ // Analyze the output
+ Console.WriteLine("═══════════════════════════════════════════════════════════════");
+ Console.WriteLine(" VALIDATION RESULTS");
+ Console.WriteLine("═══════════════════════════════════════════════════════════════\n");
+
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+ var mainXDoc = wDoc.MainDocumentPart.GetXDocument();
+
+ // Count elements
+ var moveFromCount = mainXDoc.Descendants(W + "moveFrom").Count();
+ var moveToCount = mainXDoc.Descendants(W + "moveTo").Count();
+ var moveFromRangeStartCount = mainXDoc.Descendants(W + "moveFromRangeStart").Count();
+ var moveFromRangeEndCount = mainXDoc.Descendants(W + "moveFromRangeEnd").Count();
+ var moveToRangeStartCount = mainXDoc.Descendants(W + "moveToRangeStart").Count();
+ var moveToRangeEndCount = mainXDoc.Descendants(W + "moveToRangeEnd").Count();
+ var delCount = mainXDoc.Descendants(W + "del").Count();
+ var insCount = mainXDoc.Descendants(W + "ins").Count();
+
+ Console.WriteLine("Move Elements:");
+ Console.WriteLine($" • w:moveFrom: {moveFromCount}");
+ Console.WriteLine($" • w:moveTo: {moveToCount}");
+ Console.WriteLine($" • w:moveFromRangeStart:{moveFromRangeStartCount}");
+ Console.WriteLine($" • w:moveFromRangeEnd: {moveFromRangeEndCount}");
+ Console.WriteLine($" • w:moveToRangeStart: {moveToRangeStartCount}");
+ Console.WriteLine($" • w:moveToRangeEnd: {moveToRangeEndCount}");
+ Console.WriteLine();
+ Console.WriteLine("Regular Revision Elements:");
+ Console.WriteLine($" • w:del: {delCount}");
+ Console.WriteLine($" • w:ins: {insCount}");
+ Console.WriteLine();
+
+ // Collect all revision IDs
+ var allRevisionIds = new Dictionary>();
+ var revisionElements = new[] { "ins", "del", "moveFrom", "moveTo", "rPrChange" };
+
+ foreach (var elemName in revisionElements)
+ {
+ foreach (var elem in mainXDoc.Descendants(W + elemName))
+ {
+ var id = elem.Attribute(W + "id")?.Value;
+ if (id != null)
+ {
+ if (!allRevisionIds.ContainsKey(id))
+ allRevisionIds[id] = new List();
+ allRevisionIds[id].Add(elemName);
+ }
+ }
+ }
+
+ Console.WriteLine("ID Analysis:");
+ Console.WriteLine($" • Total unique IDs: {allRevisionIds.Count}");
+
+ // Check for the Issue #96 bug: duplicate IDs
+ var duplicates = allRevisionIds.Where(kvp => kvp.Value.Count > 1).ToList();
+ if (duplicates.Count > 0)
+ {
+ Console.WriteLine();
+ Console.WriteLine(" ❌ DUPLICATE IDs FOUND (Issue #96 BUG!):");
+ foreach (var dup in duplicates)
+ {
+ Console.WriteLine($" ID={dup.Key} used by: {string.Join(", ", dup.Value)}");
+ }
+ }
+ else
+ {
+ Console.WriteLine(" ✅ No duplicate IDs (Issue #96 is FIXED!)");
+ }
+
+ // Show move names
+ var moveNames = mainXDoc.Descendants(W + "moveFromRangeStart")
+ .Select(e => e.Attribute(W + "name")?.Value)
+ .Where(n => n != null)
+ .Distinct()
+ .ToList();
+
+ if (moveNames.Count > 0)
+ {
+ Console.WriteLine();
+ Console.WriteLine("Move Name Linking:");
+ foreach (var name in moveNames)
+ {
+ var fromCount = mainXDoc.Descendants(W + "moveFromRangeStart")
+ .Count(e => e.Attribute(W + "name")?.Value == name);
+ var toCount = mainXDoc.Descendants(W + "moveToRangeStart")
+ .Count(e => e.Attribute(W + "name")?.Value == name);
+ Console.WriteLine($" • '{name}': {fromCount} source(s), {toCount} destination(s)");
+ }
+ }
+
+ // Run OpenXML validation
+ Console.WriteLine();
+ Console.WriteLine("OpenXML Validation:");
+ var validator = new OpenXmlValidator(FileFormatVersions.Office2019);
+ var errors = validator.Validate(wDoc).ToList();
+ if (errors.Count == 0)
+ {
+ Console.WriteLine(" ✅ Document is valid per OpenXML schema");
+ }
+ else
+ {
+ Console.WriteLine($" ⚠️ {errors.Count} validation issue(s):");
+ foreach (var error in errors.Take(5))
+ {
+ Console.WriteLine($" - {error.Description}");
+ }
+ }
+
+ Console.WriteLine();
+ Console.WriteLine("═══════════════════════════════════════════════════════════════");
+ Console.WriteLine(" SUMMARY");
+ Console.WriteLine("═══════════════════════════════════════════════════════════════");
+
+ bool hasMoves = moveFromCount > 0 && moveToCount > 0;
+ bool hasDelIns = delCount > 0 || insCount > 0;
+ bool noDuplicates = duplicates.Count == 0;
+ bool validXml = errors.Count == 0;
+
+ Console.WriteLine($" Move detection working: {(hasMoves ? "✅ YES" : "⚠️ NO")}");
+ Console.WriteLine($" Has regular del/ins: {(hasDelIns ? "✅ YES" : "❌ NO")}");
+ Console.WriteLine($" No duplicate IDs: {(noDuplicates ? "✅ PASS" : "❌ FAIL")}");
+ Console.WriteLine($" OpenXML validation: {(validXml ? "✅ PASS" : "⚠️ ISSUES")}");
+ Console.WriteLine();
+
+ if (hasMoves && hasDelIns && noDuplicates)
+ {
+ Console.WriteLine("🎉 Issue #96 is FIXED! Document has both moves AND del/ins with unique IDs.");
+ Console.WriteLine($" Open the file in Word to verify: {outputPath}");
+ }
+ else if (!hasMoves)
+ {
+ Console.WriteLine("ℹ️ No moves were detected (similarity threshold not met).");
+ Console.WriteLine(" The ID uniqueness fix is still validated.");
+ }
+ }
+
+ static WmlDocument CreateDocument(string[] paragraphs)
+ {
+ using var stream = new MemoryStream();
+ using (var doc = WordprocessingDocument.Create(stream, WordprocessingDocumentType.Document))
+ {
+ var mainPart = doc.AddMainDocumentPart();
+ mainPart.Document = new Document(
+ new Body(
+ paragraphs.Select(text =>
+ new Paragraph(
+ new Run(
+ new Text(text)
+ )
+ )
+ )
+ )
+ );
+
+ var stylesPart = mainPart.AddNewPart();
+ stylesPart.Styles = new Styles(
+ new DocDefaults(
+ new RunPropertiesDefault(
+ new RunPropertiesBaseStyle(
+ new RunFonts { Ascii = "Calibri" },
+ new FontSize { Val = "22" }
+ )
+ ),
+ new ParagraphPropertiesDefault()
+ )
+ );
+
+ var settingsPart = mainPart.AddNewPart();
+ settingsPart.Settings = new Settings();
+
+ doc.Save();
+ }
+
+ stream.Position = 0;
+ return new WmlDocument("test.docx", stream.ToArray());
+ }
+}
diff --git a/TestFiles/Issue96/Issue96ValidationTest.cs b/TestFiles/Issue96/Issue96ValidationTest.cs
new file mode 100644
index 0000000..8fd82ef
--- /dev/null
+++ b/TestFiles/Issue96/Issue96ValidationTest.cs
@@ -0,0 +1,277 @@
+// Issue #96 Validation Test
+// This test creates documents with moves and validates that:
+// 1. Move detection works correctly
+// 2. All revision IDs are unique (the core Issue #96 fix)
+// 3. The output document can be opened without errors
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Xml.Linq;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Validation;
+using Docxodus;
+
+class Issue96ValidationTest
+{
+ static XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+
+ static void Main(string[] args)
+ {
+ Console.WriteLine("=== Issue #96 Validation Test ===\n");
+
+ var outputDir = Path.GetDirectoryName(typeof(Issue96ValidationTest).Assembly.Location)
+ ?? Directory.GetCurrentDirectory();
+
+ // Test 1: Simple paragraph swap (single move)
+ Console.WriteLine("Test 1: Simple paragraph swap");
+ RunTest("SimpleSwap", outputDir,
+ new[] {
+ "The quick brown fox jumps over the lazy dog.",
+ "Pack my box with five dozen liquor jugs.",
+ "How vexingly quick daft zebras jump."
+ },
+ new[] {
+ "Pack my box with five dozen liquor jugs.",
+ "The quick brown fox jumps over the lazy dog.",
+ "How vexingly quick daft zebras jump."
+ });
+
+ // Test 2: Move with additional changes (the Issue #96 scenario)
+ Console.WriteLine("\nTest 2: Move with additional ins/del changes");
+ RunTest("MoveWithChanges", outputDir,
+ new[] {
+ "First paragraph that will be moved to the end.",
+ "Second paragraph that stays but gets modified.",
+ "Third paragraph that will be deleted entirely.",
+ "Fourth paragraph that remains unchanged."
+ },
+ new[] {
+ "Second paragraph that was modified here today.",
+ "Fourth paragraph that remains unchanged.",
+ "First paragraph that will be moved to the end.",
+ "Fifth paragraph that is completely new."
+ });
+
+ // Test 3: Multiple independent moves
+ Console.WriteLine("\nTest 3: Multiple content blocks");
+ RunTest("MultipleBlocks", outputDir,
+ new[] {
+ "Alpha paragraph with enough words for detection.",
+ "Beta paragraph with sufficient content here.",
+ "Gamma paragraph stays in the same position.",
+ "Delta paragraph with more words for testing."
+ },
+ new[] {
+ "Gamma paragraph stays in the same position.",
+ "Beta paragraph with sufficient content here.",
+ "Alpha paragraph with enough words for detection.",
+ "Delta paragraph with more words for testing."
+ });
+
+ Console.WriteLine("\n=== All Tests Complete ===");
+ Console.WriteLine($"Output files written to: {outputDir}");
+ }
+
+ static void RunTest(string testName, string outputDir, string[] doc1Paragraphs, string[] doc2Paragraphs)
+ {
+ try
+ {
+ // Create test documents
+ var doc1 = CreateDocument(doc1Paragraphs);
+ var doc2 = CreateDocument(doc2Paragraphs);
+
+ // Compare with move detection enabled
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false, // Keep native move markup
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3,
+ AuthorForRevisions = "Issue96Test"
+ };
+
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Save the comparison document
+ var outputPath = Path.Combine(outputDir, $"{testName}_Compared.docx");
+ File.WriteAllBytes(outputPath, compared.DocumentByteArray);
+ Console.WriteLine($" ✓ Saved: {testName}_Compared.docx");
+
+ // Validate ID uniqueness
+ var (isValid, details) = ValidateRevisionIds(compared);
+ if (isValid)
+ {
+ Console.WriteLine($" ✓ All revision IDs are unique");
+ }
+ else
+ {
+ Console.WriteLine($" ✗ ID COLLISION DETECTED: {details}");
+ }
+
+ // Check for move elements
+ var moveInfo = AnalyzeMoveElements(compared);
+ Console.WriteLine($" ✓ Move elements: {moveInfo.MoveFromCount} moveFrom, {moveInfo.MoveToCount} moveTo");
+ Console.WriteLine($" ✓ Del/Ins elements: {moveInfo.DelCount} del, {moveInfo.InsCount} ins");
+ Console.WriteLine($" ✓ Move names: {string.Join(", ", moveInfo.MoveNames)}");
+
+ // Run OpenXML validation
+ var validationErrors = ValidateDocument(compared);
+ if (validationErrors.Count == 0)
+ {
+ Console.WriteLine($" ✓ OpenXML validation passed");
+ }
+ else
+ {
+ Console.WriteLine($" ⚠ OpenXML validation: {validationErrors.Count} issues");
+ foreach (var error in validationErrors.Take(3))
+ {
+ Console.WriteLine($" - {error}");
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ Console.WriteLine($" ✗ ERROR: {ex.Message}");
+ }
+ }
+
+ static WmlDocument CreateDocument(string[] paragraphs)
+ {
+ using var stream = new MemoryStream();
+ using (var doc = WordprocessingDocument.Create(stream, WordprocessingDocumentType.Document))
+ {
+ var mainPart = doc.AddMainDocumentPart();
+ mainPart.Document = new Document(
+ new Body(
+ paragraphs.Select(text =>
+ new Paragraph(
+ new Run(
+ new Text(text)
+ )
+ )
+ )
+ )
+ );
+
+ var stylesPart = mainPart.AddNewPart();
+ stylesPart.Styles = new Styles(
+ new DocDefaults(
+ new RunPropertiesDefault(
+ new RunPropertiesBaseStyle(
+ new RunFonts { Ascii = "Calibri" },
+ new FontSize { Val = "22" }
+ )
+ ),
+ new ParagraphPropertiesDefault()
+ )
+ );
+
+ var settingsPart = mainPart.AddNewPart();
+ settingsPart.Settings = new Settings();
+
+ doc.Save();
+ }
+
+ stream.Position = 0;
+ return new WmlDocument("test.docx", stream.ToArray());
+ }
+
+ static (bool IsValid, string Details) ValidateRevisionIds(WmlDocument doc)
+ {
+ using var stream = new MemoryStream(doc.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+
+ var allIds = new List<(string Id, string ElementType, string Location)>();
+ var revisionElements = new[] { "ins", "del", "moveFrom", "moveTo", "rPrChange" };
+
+ // Check main document
+ var mainXDoc = wDoc.MainDocumentPart.GetXDocument();
+ foreach (var elemName in revisionElements)
+ {
+ foreach (var elem in mainXDoc.Descendants(W + elemName))
+ {
+ var id = elem.Attribute(W + "id")?.Value;
+ if (id != null)
+ {
+ allIds.Add((id, elemName, "MainDocument"));
+ }
+ }
+ }
+
+ // Check footnotes
+ if (wDoc.MainDocumentPart.FootnotesPart != null)
+ {
+ var fnXDoc = wDoc.MainDocumentPart.FootnotesPart.GetXDocument();
+ foreach (var elemName in revisionElements)
+ {
+ foreach (var elem in fnXDoc.Descendants(W + elemName))
+ {
+ var id = elem.Attribute(W + "id")?.Value;
+ if (id != null)
+ {
+ allIds.Add((id, elemName, "Footnotes"));
+ }
+ }
+ }
+ }
+
+ // Find duplicates (excluding range pairs which share IDs by design)
+ var duplicates = allIds.GroupBy(x => x.Id)
+ .Where(g => g.Count() > 1)
+ .Where(g => {
+ // Range start/end pairs are allowed to share IDs
+ var types = g.Select(x => x.ElementType).Distinct().ToList();
+ if (types.Count == 1 && (types[0] == "moveFromRangeStart" || types[0] == "moveToRangeStart"))
+ return false;
+ return true;
+ })
+ .ToList();
+
+ if (duplicates.Count == 0)
+ {
+ return (true, $"All {allIds.Count} revision IDs are unique");
+ }
+
+ var details = string.Join("; ", duplicates.Select(g =>
+ $"ID={g.Key} used by: {string.Join(", ", g.Select(x => $"{x.ElementType}@{x.Location}"))}"));
+ return (false, details);
+ }
+
+ static (int MoveFromCount, int MoveToCount, int DelCount, int InsCount, List MoveNames)
+ AnalyzeMoveElements(WmlDocument doc)
+ {
+ using var stream = new MemoryStream(doc.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+
+ var mainXDoc = wDoc.MainDocumentPart.GetXDocument();
+
+ var moveFromCount = mainXDoc.Descendants(W + "moveFrom").Count();
+ var moveToCount = mainXDoc.Descendants(W + "moveTo").Count();
+ var delCount = mainXDoc.Descendants(W + "del").Count();
+ var insCount = mainXDoc.Descendants(W + "ins").Count();
+
+ var moveNames = mainXDoc.Descendants(W + "moveFromRangeStart")
+ .Select(e => e.Attribute(W + "name")?.Value)
+ .Where(n => n != null)
+ .Distinct()
+ .ToList();
+
+ return (moveFromCount, moveToCount, delCount, insCount, moveNames);
+ }
+
+ static List ValidateDocument(WmlDocument doc)
+ {
+ using var stream = new MemoryStream(doc.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+
+ var validator = new OpenXmlValidator(FileFormatVersions.Office2019);
+ return validator.Validate(wDoc)
+ .Select(e => $"{e.ErrorType}: {e.Description}")
+ .Take(10)
+ .ToList();
+ }
+}
diff --git a/TestFiles/Issue96/Issue96ValidationTest.csproj b/TestFiles/Issue96/Issue96ValidationTest.csproj
new file mode 100644
index 0000000..4a097fc
--- /dev/null
+++ b/TestFiles/Issue96/Issue96ValidationTest.csproj
@@ -0,0 +1,16 @@
+
+
+
+ Exe
+ net8.0
+ disable
+ disable
+ SA1636
+ StressTest
+
+
+
+
+
+
+
diff --git a/TestFiles/Issue96/ShowMoveXml.cs b/TestFiles/Issue96/ShowMoveXml.cs
new file mode 100644
index 0000000..9d4264b
--- /dev/null
+++ b/TestFiles/Issue96/ShowMoveXml.cs
@@ -0,0 +1,157 @@
+// Simple test to show move markup is working correctly
+using System;
+using System.IO;
+using System.Linq;
+using System.Xml.Linq;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Wordprocessing;
+using Docxodus;
+
+class ShowMoveXml
+{
+ static void Main()
+ {
+ Console.WriteLine("Creating test documents...\n");
+
+ // Create doc1: A, B, C
+ var doc1 = CreateDoc(
+ "This is paragraph A with enough words for move detection.",
+ "This is paragraph B with sufficient content here.",
+ "This is paragraph C with more words added."
+ );
+
+ // Create doc2: B, A, C (A moved after B)
+ var doc2 = CreateDoc(
+ "This is paragraph B with sufficient content here.",
+ "This is paragraph A with enough words for move detection.",
+ "This is paragraph C with more words added."
+ );
+
+ Console.WriteLine("Doc1: [A] [B] [C]");
+ Console.WriteLine("Doc2: [B] [A] [C] (A moved after B)\n");
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.8,
+ MoveMinimumWordCount = 3
+ };
+
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+
+ // Extract and display the body XML
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+
+ var body = wDoc.MainDocumentPart.Document.Body;
+ var bodyXml = XElement.Parse(body.OuterXml);
+
+ XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+
+ Console.WriteLine("=== MOVE ELEMENTS FOUND ===\n");
+
+ // Show moveFromRangeStart elements
+ var moveFromStarts = bodyXml.Descendants(w + "moveFromRangeStart").ToList();
+ Console.WriteLine($"moveFromRangeStart: {moveFromStarts.Count}");
+ foreach (var e in moveFromStarts)
+ {
+ Console.WriteLine($" id={e.Attribute(w + "id")?.Value}, name={e.Attribute(w + "name")?.Value}");
+ }
+
+ // Show moveFrom elements
+ var moveFroms = bodyXml.Descendants(w + "moveFrom").ToList();
+ Console.WriteLine($"\nmoveFrom: {moveFroms.Count}");
+ foreach (var e in moveFroms)
+ {
+ var text = string.Join("", e.Descendants(w + "t").Select(t => t.Value));
+ Console.WriteLine($" id={e.Attribute(w + "id")?.Value}, text=\"{Truncate(text, 50)}\"");
+ }
+
+ // Show moveToRangeStart elements
+ var moveToStarts = bodyXml.Descendants(w + "moveToRangeStart").ToList();
+ Console.WriteLine($"\nmoveToRangeStart: {moveToStarts.Count}");
+ foreach (var e in moveToStarts)
+ {
+ Console.WriteLine($" id={e.Attribute(w + "id")?.Value}, name={e.Attribute(w + "name")?.Value}");
+ }
+
+ // Show moveTo elements
+ var moveTos = bodyXml.Descendants(w + "moveTo").ToList();
+ Console.WriteLine($"\nmoveTo: {moveTos.Count}");
+ foreach (var e in moveTos)
+ {
+ var text = string.Join("", e.Descendants(w + "t").Select(t => t.Value));
+ Console.WriteLine($" id={e.Attribute(w + "id")?.Value}, text=\"{Truncate(text, 50)}\"");
+ }
+
+ Console.WriteLine("\n=== DEL/INS ELEMENTS ===\n");
+
+ var dels = bodyXml.Descendants(w + "del").ToList();
+ var inss = bodyXml.Descendants(w + "ins").ToList();
+ Console.WriteLine($"del: {dels.Count}");
+ Console.WriteLine($"ins: {inss.Count}");
+
+ Console.WriteLine("\n=== ALL REVISION IDs ===\n");
+
+ var allIds = new[] { "del", "ins", "moveFrom", "moveTo" }
+ .SelectMany(name => bodyXml.Descendants(w + name)
+ .Select(e => new { Type = name, Id = e.Attribute(w + "id")?.Value }))
+ .Where(x => x.Id != null)
+ .OrderBy(x => int.Parse(x.Id))
+ .ToList();
+
+ foreach (var item in allIds)
+ {
+ Console.WriteLine($" {item.Type,-10} id={item.Id}");
+ }
+
+ // Check for duplicates
+ var duplicates = allIds.GroupBy(x => x.Id).Where(g => g.Count() > 1).ToList();
+ Console.WriteLine();
+ if (duplicates.Any())
+ {
+ Console.WriteLine("❌ DUPLICATE IDs FOUND (BUG!):");
+ foreach (var dup in duplicates)
+ {
+ Console.WriteLine($" ID {dup.Key}: {string.Join(", ", dup.Select(x => x.Type))}");
+ }
+ }
+ else
+ {
+ Console.WriteLine("✅ All IDs are unique - Issue #96 is FIXED!");
+ }
+
+ // Save output file
+ var outputPath = "Issue96_MoveDemo.docx";
+ File.WriteAllBytes(outputPath, compared.DocumentByteArray);
+ Console.WriteLine($"\n📄 Output saved: {Path.GetFullPath(outputPath)}");
+ }
+
+ static WmlDocument CreateDoc(params string[] paragraphs)
+ {
+ using var stream = new MemoryStream();
+ using (var doc = WordprocessingDocument.Create(stream, WordprocessingDocumentType.Document))
+ {
+ var mainPart = doc.AddMainDocumentPart();
+ mainPart.Document = new Document(new Body(
+ paragraphs.Select(t => new Paragraph(new Run(new Text(t))))));
+
+ var stylesPart = mainPart.AddNewPart();
+ stylesPart.Styles = new Styles(new DocDefaults(
+ new RunPropertiesDefault(new RunPropertiesBaseStyle(
+ new RunFonts { Ascii = "Calibri" }, new FontSize { Val = "22" })),
+ new ParagraphPropertiesDefault()));
+
+ var settingsPart = mainPart.AddNewPart();
+ settingsPart.Settings = new Settings();
+ doc.Save();
+ }
+ stream.Position = 0;
+ return new WmlDocument("test.docx", stream.ToArray());
+ }
+
+ static string Truncate(string s, int max) =>
+ s.Length <= max ? s : s.Substring(0, max) + "...";
+}
diff --git a/TestFiles/Issue96/StressTest.cs b/TestFiles/Issue96/StressTest.cs
new file mode 100644
index 0000000..fa84347
--- /dev/null
+++ b/TestFiles/Issue96/StressTest.cs
@@ -0,0 +1,359 @@
+// Issue #96 Stress Test
+// Creates complex documents with dozens of moves and hundreds of changes
+// to thoroughly validate the ID uniqueness fix
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Xml.Linq;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Validation;
+using Docxodus;
+
+class StressTest
+{
+ static readonly Random Rng = new Random(42); // Fixed seed for reproducibility
+ static XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
+
+ static void Main()
+ {
+ Console.WriteLine("╔════════════════════════════════════════════════════════════════════╗");
+ Console.WriteLine("║ ISSUE #96 STRESS TEST ║");
+ Console.WriteLine("║ Dozens of moves + Hundreds of changes ║");
+ Console.WriteLine("╚════════════════════════════════════════════════════════════════════╝\n");
+
+ // Test 1: Large document with many moves
+ Console.WriteLine("═══════════════════════════════════════════════════════════════════════");
+ Console.WriteLine("TEST 1: 50 paragraphs, ~15 moves, ~30 other changes");
+ Console.WriteLine("═══════════════════════════════════════════════════════════════════════\n");
+ RunStressTest("StressTest1", 50, 15, 30);
+
+ // Test 2: Even larger with more chaos
+ Console.WriteLine("\n═══════════════════════════════════════════════════════════════════════");
+ Console.WriteLine("TEST 2: 100 paragraphs, ~25 moves, ~50 other changes");
+ Console.WriteLine("═══════════════════════════════════════════════════════════════════════\n");
+ RunStressTest("StressTest2", 100, 25, 50);
+
+ // Test 3: Maximum chaos
+ Console.WriteLine("\n═══════════════════════════════════════════════════════════════════════");
+ Console.WriteLine("TEST 3: 200 paragraphs, ~40 moves, ~100 other changes");
+ Console.WriteLine("═══════════════════════════════════════════════════════════════════════\n");
+ RunStressTest("StressTest3", 200, 40, 100);
+
+ Console.WriteLine("\n════════════════════════════════════════════════════════════════════════");
+ Console.WriteLine(" ALL STRESS TESTS COMPLETE");
+ Console.WriteLine("════════════════════════════════════════════════════════════════════════\n");
+ }
+
+ static void RunStressTest(string name, int paragraphCount, int moveCount, int changeCount)
+ {
+ Console.WriteLine($"Generating {paragraphCount} paragraphs...");
+
+ // Generate original document with numbered paragraphs containing unique content
+ var originalParagraphs = Enumerable.Range(1, paragraphCount)
+ .Select(i => GenerateParagraph(i))
+ .ToList();
+
+ // Create modified version with moves and changes
+ var modifiedParagraphs = new List(originalParagraphs);
+
+ // Track what we're doing for reporting
+ var moves = new List<(int from, int to, string text)>();
+ var deletions = new List();
+ var insertions = new List();
+ var modifications = new List();
+
+ // Apply moves: pick random paragraphs and move them to new positions
+ Console.WriteLine($"Applying ~{moveCount} moves...");
+ var availableForMove = Enumerable.Range(0, modifiedParagraphs.Count).ToList();
+ for (int i = 0; i < moveCount && availableForMove.Count > 2; i++)
+ {
+ int fromIdx = availableForMove[Rng.Next(availableForMove.Count)];
+ availableForMove.Remove(fromIdx);
+
+ var para = modifiedParagraphs[fromIdx];
+ modifiedParagraphs.RemoveAt(fromIdx);
+
+ // Adjust available indices after removal
+ availableForMove = availableForMove.Select(x => x > fromIdx ? x - 1 : x).ToList();
+
+ int toIdx = Rng.Next(modifiedParagraphs.Count + 1);
+ modifiedParagraphs.Insert(toIdx, para);
+
+ // Adjust available indices after insertion
+ availableForMove = availableForMove.Select(x => x >= toIdx ? x + 1 : x).ToList();
+
+ moves.Add((fromIdx, toIdx, para.Substring(0, Math.Min(40, para.Length))));
+ }
+
+ // Apply deletions
+ int deleteCount = changeCount / 3;
+ Console.WriteLine($"Applying ~{deleteCount} deletions...");
+ for (int i = 0; i < deleteCount && modifiedParagraphs.Count > paragraphCount / 2; i++)
+ {
+ int idx = Rng.Next(modifiedParagraphs.Count);
+ deletions.Add(idx);
+ modifiedParagraphs.RemoveAt(idx);
+ }
+
+ // Apply insertions
+ int insertCount = changeCount / 3;
+ Console.WriteLine($"Applying ~{insertCount} insertions...");
+ for (int i = 0; i < insertCount; i++)
+ {
+ int idx = Rng.Next(modifiedParagraphs.Count + 1);
+ insertions.Add(idx);
+ modifiedParagraphs.Insert(idx, $"[NEW-{i + 1}] This is a newly inserted paragraph with enough words to be meaningful. " +
+ $"It contains various content including technical terms, legal jargon, and general prose. " +
+ $"The purpose is to test the comparison engine with substantial insertions. Reference: INS-{Guid.NewGuid():N}");
+ }
+
+ // Apply modifications (change words in existing paragraphs)
+ int modifyCount = changeCount / 3;
+ Console.WriteLine($"Applying ~{modifyCount} modifications...");
+ for (int i = 0; i < modifyCount && modifiedParagraphs.Count > 0; i++)
+ {
+ int idx = Rng.Next(modifiedParagraphs.Count);
+ var para = modifiedParagraphs[idx];
+ // Modify by replacing some words
+ para = para.Replace("paragraph", "section")
+ .Replace("content", "material")
+ .Replace("document", "file");
+ if (!para.Contains("[MODIFIED]"))
+ {
+ para = "[MODIFIED] " + para;
+ }
+ modifiedParagraphs[idx] = para;
+ modifications.Add(idx);
+ }
+
+ Console.WriteLine($"\nCreating documents...");
+ Console.WriteLine($" Original: {originalParagraphs.Count} paragraphs");
+ Console.WriteLine($" Modified: {modifiedParagraphs.Count} paragraphs");
+
+ var doc1 = CreateDocument(originalParagraphs);
+ var doc2 = CreateDocument(modifiedParagraphs);
+
+ Console.WriteLine($"\nComparing with DetectMoves=true...");
+
+ var settings = new WmlComparerSettings
+ {
+ DetectMoves = true,
+ SimplifyMoveMarkup = false,
+ MoveSimilarityThreshold = 0.75, // Slightly lower threshold to catch more moves
+ MoveMinimumWordCount = 5,
+ AuthorForRevisions = "StressTest"
+ };
+
+ var stopwatch = System.Diagnostics.Stopwatch.StartNew();
+ var compared = WmlComparer.Compare(doc1, doc2, settings);
+ stopwatch.Stop();
+
+ Console.WriteLine($" Comparison completed in {stopwatch.ElapsedMilliseconds}ms");
+
+ // Save output
+ var outputPath = $"{name}_Output.docx";
+ File.WriteAllBytes(outputPath, compared.DocumentByteArray);
+ Console.WriteLine($"\n📄 Output: {Path.GetFullPath(outputPath)}");
+
+ // Analyze results
+ Console.WriteLine($"\n--- ANALYSIS ---\n");
+
+ using var stream = new MemoryStream(compared.DocumentByteArray);
+ using var wDoc = WordprocessingDocument.Open(stream, false);
+ var mainXDoc = wDoc.MainDocumentPart.GetXDocument();
+
+ // Count elements
+ var stats = new Dictionary
+ {
+ ["moveFrom"] = mainXDoc.Descendants(W + "moveFrom").Count(),
+ ["moveTo"] = mainXDoc.Descendants(W + "moveTo").Count(),
+ ["moveFromRangeStart"] = mainXDoc.Descendants(W + "moveFromRangeStart").Count(),
+ ["moveFromRangeEnd"] = mainXDoc.Descendants(W + "moveFromRangeEnd").Count(),
+ ["moveToRangeStart"] = mainXDoc.Descendants(W + "moveToRangeStart").Count(),
+ ["moveToRangeEnd"] = mainXDoc.Descendants(W + "moveToRangeEnd").Count(),
+ ["del"] = mainXDoc.Descendants(W + "del").Count(),
+ ["ins"] = mainXDoc.Descendants(W + "ins").Count(),
+ ["rPrChange"] = mainXDoc.Descendants(W + "rPrChange").Count(),
+ };
+
+ Console.WriteLine("Element Counts:");
+ Console.WriteLine($" Move elements:");
+ Console.WriteLine($" moveFrom: {stats["moveFrom"],4}");
+ Console.WriteLine($" moveTo: {stats["moveTo"],4}");
+ Console.WriteLine($" moveFromRangeStart:{stats["moveFromRangeStart"],4}");
+ Console.WriteLine($" moveToRangeStart: {stats["moveToRangeStart"],4}");
+ Console.WriteLine($" Revision elements:");
+ Console.WriteLine($" del: {stats["del"],4}");
+ Console.WriteLine($" ins: {stats["ins"],4}");
+ Console.WriteLine($" rPrChange: {stats["rPrChange"],4}");
+
+ // Collect all revision IDs
+ var revisionElements = new[] { "ins", "del", "moveFrom", "moveTo", "rPrChange" };
+ var allIds = new List<(string Id, string Type)>();
+
+ foreach (var elemName in revisionElements)
+ {
+ foreach (var elem in mainXDoc.Descendants(W + elemName))
+ {
+ var id = elem.Attribute(W + "id")?.Value;
+ if (id != null)
+ {
+ allIds.Add((id, elemName));
+ }
+ }
+ }
+
+ Console.WriteLine($"\n Total revision elements with IDs: {allIds.Count}");
+
+ // Check for duplicates - THE CRITICAL TEST
+ var duplicates = allIds.GroupBy(x => x.Id)
+ .Where(g => g.Count() > 1)
+ .ToList();
+
+ if (duplicates.Count > 0)
+ {
+ Console.WriteLine($"\n ❌ DUPLICATE IDs FOUND - ISSUE #96 BUG!");
+ foreach (var dup in duplicates.Take(10))
+ {
+ Console.WriteLine($" ID={dup.Key}: {string.Join(", ", dup.Select(x => x.Type))}");
+ }
+ if (duplicates.Count > 10)
+ {
+ Console.WriteLine($" ... and {duplicates.Count - 10} more duplicates");
+ }
+ }
+ else
+ {
+ Console.WriteLine($"\n ✅ All {allIds.Count} revision IDs are UNIQUE!");
+ }
+
+ // Check move name pairing
+ var moveFromNames = mainXDoc.Descendants(W + "moveFromRangeStart")
+ .Select(e => e.Attribute(W + "name")?.Value)
+ .Where(n => !string.IsNullOrEmpty(n))
+ .ToList();
+
+ var moveToNames = mainXDoc.Descendants(W + "moveToRangeStart")
+ .Select(e => e.Attribute(W + "name")?.Value)
+ .Where(n => !string.IsNullOrEmpty(n))
+ .ToList();
+
+ var uniqueMoveNames = moveFromNames.Union(moveToNames).Distinct().ToList();
+ Console.WriteLine($"\n Move names: {uniqueMoveNames.Count} unique ({string.Join(", ", uniqueMoveNames.Take(10))}{(uniqueMoveNames.Count > 10 ? "..." : "")})");
+
+ // Validate all names are paired
+ var unpairedFrom = moveFromNames.Except(moveToNames).ToList();
+ var unpairedTo = moveToNames.Except(moveFromNames).ToList();
+
+ if (unpairedFrom.Any() || unpairedTo.Any())
+ {
+ Console.WriteLine($" ⚠️ Unpaired move names found:");
+ if (unpairedFrom.Any()) Console.WriteLine($" From without To: {string.Join(", ", unpairedFrom)}");
+ if (unpairedTo.Any()) Console.WriteLine($" To without From: {string.Join(", ", unpairedTo)}");
+ }
+ else if (uniqueMoveNames.Count > 0)
+ {
+ Console.WriteLine($" ✅ All move names properly paired!");
+ }
+
+ // OpenXML validation
+ var validator = new OpenXmlValidator(FileFormatVersions.Office2019);
+ var errors = validator.Validate(wDoc).ToList();
+
+ if (errors.Count == 0)
+ {
+ Console.WriteLine($"\n ✅ OpenXML validation PASSED");
+ }
+ else
+ {
+ Console.WriteLine($"\n ⚠️ OpenXML validation: {errors.Count} issues");
+ foreach (var error in errors.Take(5))
+ {
+ Console.WriteLine($" {error.ErrorType}: {error.Description.Substring(0, Math.Min(80, error.Description.Length))}...");
+ }
+ }
+
+ // Final verdict
+ Console.WriteLine($"\n--- VERDICT ---");
+ bool passed = duplicates.Count == 0;
+ if (passed)
+ {
+ Console.WriteLine($" 🎉 {name} PASSED - No duplicate IDs with {stats["moveFrom"]} moves and {stats["del"] + stats["ins"]} del/ins");
+ }
+ else
+ {
+ Console.WriteLine($" 💥 {name} FAILED - Found {duplicates.Count} duplicate IDs");
+ }
+ }
+
+ static string GenerateParagraph(int index)
+ {
+ var templates = new[]
+ {
+ "Paragraph {0}: This document section contains important information about the project requirements and specifications. Reference ID: {1}",
+ "Section {0}: The following content describes the technical implementation details for the proposed system architecture. Doc: {1}",
+ "Item {0}: According to the agreement dated herein, the parties shall comply with all terms and conditions specified. Contract: {1}",
+ "Clause {0}: The licensee agrees to use the software only for purposes permitted under this license agreement. License: {1}",
+ "Article {0}: This paragraph establishes the fundamental principles governing the relationship between the entities. Ref: {1}",
+ "Point {0}: The data processing activities shall be conducted in accordance with applicable privacy regulations. GDPR: {1}",
+ "Note {0}: All modifications to this document must be tracked and approved by the designated review committee. Rev: {1}",
+ "Entry {0}: The financial statements have been prepared in accordance with generally accepted accounting principles. GAAP: {1}",
+ "Record {0}: This memorandum summarizes the key decisions made during the executive committee meeting. Minutes: {1}",
+ "Statement {0}: The undersigned hereby certifies that all information provided is true and accurate. Cert: {1}",
+ "Provision {0}: Notwithstanding the foregoing, the obligations set forth herein shall survive termination. Legal: {1}",
+ "Stipulation {0}: The contractor shall deliver all work products by the specified deadline. Deadline: {1}",
+ "Requirement {0}: The system shall support concurrent users and maintain response times under load. Perf: {1}",
+ "Specification {0}: All API endpoints must implement proper authentication and authorization. Security: {1}",
+ "Definition {0}: For purposes of this agreement, the following terms shall have the meanings ascribed. Terms: {1}",
+ };
+
+ var template = templates[index % templates.Length];
+ return string.Format(template, index, $"DOC-{index:D4}-{Guid.NewGuid().ToString().Substring(0, 8).ToUpper()}");
+ }
+
+ static WmlDocument CreateDocument(List paragraphs)
+ {
+ using var stream = new MemoryStream();
+ using (var doc = WordprocessingDocument.Create(stream, WordprocessingDocumentType.Document))
+ {
+ var mainPart = doc.AddMainDocumentPart();
+ mainPart.Document = new Document(
+ new Body(
+ paragraphs.Select(text =>
+ new Paragraph(
+ new Run(
+ new Text(text)
+ )
+ )
+ )
+ )
+ );
+
+ var stylesPart = mainPart.AddNewPart();
+ stylesPart.Styles = new Styles(
+ new DocDefaults(
+ new RunPropertiesDefault(
+ new RunPropertiesBaseStyle(
+ new RunFonts { Ascii = "Calibri" },
+ new FontSize { Val = "22" }
+ )
+ ),
+ new ParagraphPropertiesDefault()
+ )
+ );
+
+ var settingsPart = mainPart.AddNewPart();
+ settingsPart.Settings = new Settings();
+
+ doc.Save();
+ }
+
+ stream.Position = 0;
+ return new WmlDocument("test.docx", stream.ToArray());
+ }
+}