diff --git a/src/Cli/test/LangChain.Cli.IntegrationTests/LangChain.Cli.IntegrationTests.csproj b/src/Cli/test/LangChain.Cli.IntegrationTests/LangChain.Cli.IntegrationTests.csproj
index dc237dbd..6f1943f4 100644
--- a/src/Cli/test/LangChain.Cli.IntegrationTests/LangChain.Cli.IntegrationTests.csproj
+++ b/src/Cli/test/LangChain.Cli.IntegrationTests/LangChain.Cli.IntegrationTests.csproj
@@ -7,6 +7,10 @@
enable
+
+
+
+
diff --git a/src/Core/test/UnitTests/LangChain.Core.UnitTests.csproj b/src/Core/test/UnitTests/LangChain.Core.UnitTests.csproj
index 35422a2e..2942cba7 100644
--- a/src/Core/test/UnitTests/LangChain.Core.UnitTests.csproj
+++ b/src/Core/test/UnitTests/LangChain.Core.UnitTests.csproj
@@ -4,6 +4,10 @@
net9.0
+
+
+
+
diff --git a/src/Directory.Packages.props b/src/Directory.Packages.props
index 9358d150..1ab22ceb 100644
--- a/src/Directory.Packages.props
+++ b/src/Directory.Packages.props
@@ -9,6 +9,7 @@
+
all
diff --git a/src/DocumentLoaders/IntegrationTests/LangChain.DocumentLoaders.IntegrationTests.csproj b/src/DocumentLoaders/IntegrationTests/LangChain.DocumentLoaders.IntegrationTests.csproj
index d5e6c8c7..cacde49e 100644
--- a/src/DocumentLoaders/IntegrationTests/LangChain.DocumentLoaders.IntegrationTests.csproj
+++ b/src/DocumentLoaders/IntegrationTests/LangChain.DocumentLoaders.IntegrationTests.csproj
@@ -6,6 +6,10 @@
$(NoWarn)
+
+
+
+
diff --git a/src/Meta/test/LangChain.IntegrationTests.csproj b/src/Meta/test/LangChain.IntegrationTests.csproj
index 09d189ce..838985e9 100644
--- a/src/Meta/test/LangChain.IntegrationTests.csproj
+++ b/src/Meta/test/LangChain.IntegrationTests.csproj
@@ -6,6 +6,7 @@
+
diff --git a/src/Splitters/Abstractions/src/Text/CharacterTextSplitter.cs b/src/Splitters/Abstractions/src/Text/CharacterTextSplitter.cs
index b53d958d..ec9e5b7d 100644
--- a/src/Splitters/Abstractions/src/Text/CharacterTextSplitter.cs
+++ b/src/Splitters/Abstractions/src/Text/CharacterTextSplitter.cs
@@ -15,6 +15,8 @@ public override IReadOnlyList SplitText(string text)
{
text = text ?? throw new ArgumentNullException(nameof(text));
+ text = text.Replace("\r", ""); // some people are using windows
+
List splits;
if (separator != null)
{
diff --git a/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs b/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs
index 18a4b706..67390e2d 100644
--- a/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs
+++ b/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs
@@ -76,7 +76,7 @@ public override IReadOnlyList SplitText(string text)
{
var existingHeader = currentHeader.Split('|');
- string prevHeader = string.Join("|", existingHeader.Take(existingHeader.Length - 1));
+ string prevHeader = string.Join("|", existingHeader.Take(existingHeader.Length - (1 - hLen + currentHeaderLen)));
currentHeader = prevHeader + "|" + strippedLine.TrimStart('#').Trim();
currentHeaderLen = hLen;
continue;
diff --git a/src/Splitters/Abstractions/src/Text/RecursiveCharacterTextSplitter.cs b/src/Splitters/Abstractions/src/Text/RecursiveCharacterTextSplitter.cs
index 4e96e87f..85fcb75f 100644
--- a/src/Splitters/Abstractions/src/Text/RecursiveCharacterTextSplitter.cs
+++ b/src/Splitters/Abstractions/src/Text/RecursiveCharacterTextSplitter.cs
@@ -19,6 +19,8 @@ public override IReadOnlyList SplitText(string text)
{
text = text ?? throw new ArgumentNullException(nameof(text));
+ text = text.Replace("\r", ""); // some people are using windows
+
List finalChunks = new List();
string separator = _separators[_separators.Count - 1];
diff --git a/src/Splitters/Abstractions/test/LangChain.Splitters.Abstractions.Tests.csproj b/src/Splitters/Abstractions/test/LangChain.Splitters.Abstractions.Tests.csproj
index 208c95a9..b351aa60 100644
--- a/src/Splitters/Abstractions/test/LangChain.Splitters.Abstractions.Tests.csproj
+++ b/src/Splitters/Abstractions/test/LangChain.Splitters.Abstractions.Tests.csproj
@@ -4,6 +4,10 @@
net9.0
+
+
+
+
diff --git a/src/Splitters/Abstractions/test/Resources/markdown_test_material.md b/src/Splitters/Abstractions/test/Resources/markdown_test_material.md
new file mode 100644
index 00000000..a5ef2b3c
--- /dev/null
+++ b/src/Splitters/Abstractions/test/Resources/markdown_test_material.md
@@ -0,0 +1,71 @@
+# Header A
+
+Text A
+
+## Header A.A
+
+Text A.A
+
+## Header A.B
+
+Text A.B
+
+### Header A.B.A
+
+Text A.B.A
+
+### Header A.B.B
+
+Text A.B.B
+
+### Header A.B.C
+
+Text A.B.C
+
+## Header A.C
+
+Text A.C
+
+### Header A.C.A
+
+Text A.C.A
+
+### Header A.C.B
+
+Text A.C.B
+
+# Header B
+
+Text B
+
+## Header B.A
+
+Text B.A
+
+## Header B.B
+
+Text B.B
+
+### Header B.B.A
+
+Text B.B.A
+
+### Header B.B.B
+
+Text B.B.B
+
+## Header B.C
+
+Text B.C
+
+### Header B.C.A
+
+Text B.C.A
+
+### Header B.C.B
+
+Text B.C.B
+
+### Header B.C.C
+
+Text B.C.C
\ No newline at end of file
diff --git a/src/Splitters/Abstractions/test/Tests.MarkdownHeader.cs b/src/Splitters/Abstractions/test/Tests.MarkdownHeader.cs
index 42f74eda..603fd88d 100644
--- a/src/Splitters/Abstractions/test/Tests.MarkdownHeader.cs
+++ b/src/Splitters/Abstractions/test/Tests.MarkdownHeader.cs
@@ -61,4 +61,69 @@ Hi this is Joe
res[0].Should().Be("Hi this is Jim\nHi this is Joe");
res[1].Should().Be("Hi this is Molly");
}
+
+ [Test]
+ public void TestMarkdown4()
+ {
+ var md = H.Resources.markdown_test_material_md.AsString();
+
+ var splitter = new MarkdownHeaderTextSplitter();
+ var res = splitter.SplitText(md);
+
+ res.Count.Should().Be(18);
+
+ res[0].Split("\n")[0].Should().Be("Header A");
+ res[0].Split("\n")[1].Should().Be("Text A");
+
+ res[1].Split("\n")[0].Should().Be("Header A: Header A.A");
+ res[1].Split("\n")[1].Should().Be("Text A.A");
+
+ res[2].Split("\n")[0].Should().Be("Header A: Header A.B");
+ res[2].Split("\n")[1].Should().Be("Text A.B");
+
+ res[3].Split("\n")[0].Should().Be("Header A: Header A.B: Header A.B.A");
+ res[3].Split("\n")[1].Should().Be("Text A.B.A");
+
+ res[4].Split("\n")[0].Should().Be("Header A: Header A.B: Header A.B.B");
+ res[4].Split("\n")[1].Should().Be("Text A.B.B");
+
+ res[5].Split("\n")[0].Should().Be("Header A: Header A.B: Header A.B.C");
+ res[5].Split("\n")[1].Should().Be("Text A.B.C");
+
+ res[6].Split("\n")[0].Should().Be("Header A: Header A.C");
+ res[6].Split("\n")[1].Should().Be("Text A.C");
+
+ res[7].Split("\n")[0].Should().Be("Header A: Header A.C: Header A.C.A");
+ res[7].Split("\n")[1].Should().Be("Text A.C.A");
+
+ res[8].Split("\n")[0].Should().Be("Header A: Header A.C: Header A.C.B");
+ res[8].Split("\n")[1].Should().Be("Text A.C.B");
+
+ res[9].Split("\n")[0].Should().Be("Header B");
+ res[9].Split("\n")[1].Should().Be("Text B");
+
+ res[10].Split("\n")[0].Should().Be("Header B: Header B.A");
+ res[10].Split("\n")[1].Should().Be("Text B.A");
+
+ res[11].Split("\n")[0].Should().Be("Header B: Header B.B");
+ res[11].Split("\n")[1].Should().Be("Text B.B");
+
+ res[12].Split("\n")[0].Should().Be("Header B: Header B.B: Header B.B.A");
+ res[12].Split("\n")[1].Should().Be("Text B.B.A");
+
+ res[13].Split("\n")[0].Should().Be("Header B: Header B.B: Header B.B.B");
+ res[13].Split("\n")[1].Should().Be("Text B.B.B");
+
+ res[14].Split("\n")[0].Should().Be("Header B: Header B.C");
+ res[14].Split("\n")[1].Should().Be("Text B.C");
+
+ res[15].Split("\n")[0].Should().Be("Header B: Header B.C: Header B.C.A");
+ res[15].Split("\n")[1].Should().Be("Text B.C.A");
+
+ res[16].Split("\n")[0].Should().Be("Header B: Header B.C: Header B.C.B");
+ res[16].Split("\n")[1].Should().Be("Text B.C.B");
+
+ res[17].Split("\n")[0].Should().Be("Header B: Header B.C: Header B.C.C");
+ res[17].Split("\n")[1].Should().Be("Text B.C.C");
+ }
}
\ No newline at end of file
diff --git a/src/Splitters/CSharp/test/LangChain.Splitters.CSharp.Tests.csproj b/src/Splitters/CSharp/test/LangChain.Splitters.CSharp.Tests.csproj
index 9a8ca786..a544c7af 100644
--- a/src/Splitters/CSharp/test/LangChain.Splitters.CSharp.Tests.csproj
+++ b/src/Splitters/CSharp/test/LangChain.Splitters.CSharp.Tests.csproj
@@ -4,6 +4,10 @@
net9.0
+
+
+
+
diff --git a/src/Utilities/Postgres/test/LangChain.Utilities.Postgres.IntegrationTests.csproj b/src/Utilities/Postgres/test/LangChain.Utilities.Postgres.IntegrationTests.csproj
index 5ab5ea0b..6e9dcfed 100644
--- a/src/Utilities/Postgres/test/LangChain.Utilities.Postgres.IntegrationTests.csproj
+++ b/src/Utilities/Postgres/test/LangChain.Utilities.Postgres.IntegrationTests.csproj
@@ -6,6 +6,7 @@
+