diff --git a/Common/Algorithm.cs b/Common/Algorithm.cs new file mode 100644 index 0000000..5ca36c2 --- /dev/null +++ b/Common/Algorithm.cs @@ -0,0 +1,48 @@ +using System; +using System.Text; + +namespace Common +{ + public static class Algorithm + { + public static readonly byte[] EolUtf8Bytes = Encoding.UTF8.GetBytes(Environment.NewLine); + + /// + /// Finds subsequence of bytes inside another byte array + /// + /// + /// + /// Offset of the subsequence or -1 of the subsequence was not found + public static int FindByteSubsequence(byte[] buffer, byte[] searchedBytes) + { + for (var i = 0; i < buffer.Length - searchedBytes.Length; ++i) + { + var match = false; + for (var j = 0; j < searchedBytes.Length; ++j) + { + if (buffer[i + j] == searchedBytes[j]) + { + match = true; + } + else + { + match = false; + break; + } + } + + if (match) + { + return i; + } + } + + return -1; + } + + public static int FindEolOffset(byte[] buffer) + { + return FindByteSubsequence(buffer, EolUtf8Bytes); + } + } +} diff --git a/Common/Common.csproj b/Common/Common.csproj index 2a3f25d..3b3056a 100644 --- a/Common/Common.csproj +++ b/Common/Common.csproj @@ -41,9 +41,11 @@ + + diff --git a/Common/Constants.cs b/Common/Constants.cs index 551ce2d..8d0444c 100644 --- a/Common/Constants.cs +++ b/Common/Constants.cs @@ -2,8 +2,8 @@ { public static class Constants { - public const uint Mb = 1024 * 1024; + public const uint MB = 1024 * 1024; - public const uint Gb = 1024 * Mb; + public const uint GB = 1024 * MB; } } diff --git a/Common/OffsetLength.cs b/Common/OffsetLength.cs new file mode 100644 index 0000000..43a2988 --- /dev/null +++ b/Common/OffsetLength.cs @@ -0,0 +1,9 @@ +namespace Common +{ + public class OffsetLength + { + public T Offset { get; set; } + + public T Length { get; set; } + } +} diff --git a/DataGenerator/Program.cs b/DataGenerator/Program.cs index fb18c13..92df968 100644 --- a/DataGenerator/Program.cs +++ b/DataGenerator/Program.cs @@ -13,7 +13,7 @@ namespace DataGenerator { public class Program { - private const ulong DafaultSize = Constants.Mb; + private const ulong DafaultSize = Constants.MB; private const int OutFileBuffer = 128 * 1024; private const int ChunkSize = 128; @@ -47,11 +47,11 @@ static void Main(string[] args) var lowArg = args[0].ToLowerInvariant(); if (lowArg.EndsWith("mb")) { - outSize = (ulong)size * Constants.Mb; + outSize = (ulong)size * Constants.MB; } else if (lowArg.EndsWith("gb")) { - outSize = (ulong)size * Constants.Gb; + outSize = (ulong)size * Constants.GB; } else { diff --git a/ExternalSort.Tests/Generic.cs b/ExternalSort.Tests/Generic.cs index 4e89f98..ffd385c 100644 --- a/ExternalSort.Tests/Generic.cs +++ b/ExternalSort.Tests/Generic.cs @@ -83,5 +83,18 @@ public void TestBadLinesCopmparer() Assert.AreEqual("3. Apple", lines.First()); Assert.AreEqual("End!", lines.Last()); } + + [TestMethod] + public void TestFindSubByte() + { + var hello = "Hello"; + var input = hello + Environment.NewLine + "!"; + + var endlineBytes = Encoding.UTF8.GetBytes(Environment.NewLine); + var inputBytes = Encoding.UTF8.GetBytes(input); + + var found = Algorithm.FindByteSubsequence(inputBytes, endlineBytes); + Assert.AreEqual(hello.Length, found); + } } } diff --git a/ExternalSort/App.config b/ExternalSort/App.config index ea019a8..78cb1f0 100644 --- a/ExternalSort/App.config +++ b/ExternalSort/App.config @@ -5,5 +5,6 @@ + diff --git a/ExternalSort/LineComparer.cs b/ExternalSort/LineComparer.cs index f98a007..7acae7d 100644 --- a/ExternalSort/LineComparer.cs +++ b/ExternalSort/LineComparer.cs @@ -7,7 +7,7 @@ namespace ExternalSort public class LineComparer : IComparer { /// - /// This method tries hart to follow the spec. rules but nevertheless it never fails. + /// This method tries hard to follow the specs but nevertheless it never fails. /// /// /// diff --git a/ExternalSort/Program.cs b/ExternalSort/Program.cs index 59e29ab..4889a2e 100644 --- a/ExternalSort/Program.cs +++ b/ExternalSort/Program.cs @@ -39,14 +39,20 @@ static void Main(string[] args) var appSettings = ConfigurationManager.AppSettings; var deflate = appSettings["DeflateTemp"]; + var maxMem = appSettings["MaxMemoryUsageBytes"]; + var st = new Settings + { + OrdinalStringSortOrder = option.StartsWith("/ord"), + DeflateTempFiles = deflate == "true", + }; - var ms = new MergeSort( - new Settings - { - OrdinalStringSortOrder = option.StartsWith("/ord"), - DeflateTempFiles = deflate == "true", - }); + ulong mmem; + if (ulong.TryParse(maxMem, out mmem)) + { + st.MaxMemoryUsageBytes = mmem; + } + var ms = new MergeSort(st); ms.MergeSortFile(imputFile, outputFile).Wait(); } } diff --git a/ExternalSort/Settings.cs b/ExternalSort/Settings.cs index 0de4c59..71eb5bc 100644 --- a/ExternalSort/Settings.cs +++ b/ExternalSort/Settings.cs @@ -1,4 +1,5 @@ using System; +using Common; namespace ExternalSort { @@ -6,8 +7,8 @@ public class Settings { public Settings() { - //512 MB - MaxMemoryUsageBytes = 1024 * 1024 * 1024; + //2 GB will work for 32 bit systems! + MaxMemoryUsageBytes = 2 * Constants.GB; // Safe value MaxQueueRecords = 1000;