diff --git a/applications/evaluation/Evaluators/Faithfulness/FaithfulnessEvaluator.cs b/applications/evaluation/Evaluators/Faithfulness/FaithfulnessEvaluator.cs index 6cf2e643b..4a7c83a39 100644 --- a/applications/evaluation/Evaluators/Faithfulness/FaithfulnessEvaluator.cs +++ b/applications/evaluation/Evaluators/Faithfulness/FaithfulnessEvaluator.cs @@ -59,7 +59,7 @@ internal async Task Evaluate(MemoryAnswer answer, Dictionary c.Partitions.Select(p => p.Text))) }, + { "context", string.Join('\n', answer.RelevantSources.SelectMany(c => c.Partitions.Select(p => p.Text))) }, { "answer", answer.Result }, { "statements", JsonSerializer.Serialize(extraction) } }).ConfigureAwait(false); diff --git a/applications/evaluation/Evaluators/Relevance/RelevanceEvaluator.cs b/applications/evaluation/Evaluators/Relevance/RelevanceEvaluator.cs index 00d9e0f22..d8f2de3c0 100644 --- a/applications/evaluation/Evaluators/Relevance/RelevanceEvaluator.cs +++ b/applications/evaluation/Evaluators/Relevance/RelevanceEvaluator.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; using System.Collections.Generic; using System.Linq; using System.Numerics.Tensors; @@ -68,7 +67,7 @@ private async IAsyncEnumerable GetEvaluations(MemoryAnswer { var extraction = await this.ExtractQuestion.InvokeAsync(this._kernel, new KernelArguments { - { "context", string.Join(Environment.NewLine, answer.RelevantSources.SelectMany(c => c.Partitions.Select(p => p.Text))) }, + { "context", string.Join('\n', answer.RelevantSources.SelectMany(c => c.Partitions.Select(p => p.Text))) }, { "answer", answer.Result } }).ConfigureAwait(false); diff --git a/extensions/Chunkers/Chunkers/MarkDownChunker.cs b/extensions/Chunkers/Chunkers/MarkDownChunker.cs index b974809c8..0f9966361 100644 --- a/extensions/Chunkers/Chunkers/MarkDownChunker.cs +++ b/extensions/Chunkers/Chunkers/MarkDownChunker.cs @@ -12,6 +12,7 @@ using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Chunkers.internals; using Microsoft.KernelMemory.DataFormats; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.Chunkers; @@ -152,10 +153,7 @@ public List Split(string text, MarkDownChunkerOptions options) ArgumentNullException.ThrowIfNull(options); // Clean up text. Note: LLMs don't use \r char - text = text - .Replace("\r\n", "\n", StringComparison.OrdinalIgnoreCase) - .Replace("\r", "\n", StringComparison.OrdinalIgnoreCase) - .Trim(); + text = text.NormalizeNewlines(true); // Calculate chunk size leaving room for the optional chunk header int maxChunk1Size = options.MaxTokensPerChunk - this.TokenCount(options.ChunkHeader); diff --git a/extensions/Chunkers/Chunkers/PlainTextChunker.cs b/extensions/Chunkers/Chunkers/PlainTextChunker.cs index 4e0a6ea90..911bfeb62 100644 --- a/extensions/Chunkers/Chunkers/PlainTextChunker.cs +++ b/extensions/Chunkers/Chunkers/PlainTextChunker.cs @@ -12,6 +12,7 @@ using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Chunkers.internals; using Microsoft.KernelMemory.DataFormats; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.Chunkers; @@ -127,10 +128,7 @@ public List Split(string text, PlainTextChunkerOptions options) ArgumentNullException.ThrowIfNull(options); // Clean up text. Note: LLMs don't use \r char - text = text - .Replace("\r\n", "\n", StringComparison.OrdinalIgnoreCase) - .Replace("\r", "\n", StringComparison.OrdinalIgnoreCase) - .Trim(); + text = text.NormalizeNewlines(true); // Calculate chunk size leaving room for the optional chunk header int maxChunk1Size = options.MaxTokensPerChunk - this.TokenCount(options.ChunkHeader); diff --git a/extensions/Postgres/Postgres/PostgresMemory.cs b/extensions/Postgres/Postgres/PostgresMemory.cs index 40e119262..135034e18 100644 --- a/extensions/Postgres/Postgres/PostgresMemory.cs +++ b/extensions/Postgres/Postgres/PostgresMemory.cs @@ -13,6 +13,7 @@ using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.MemoryStorage; +using Microsoft.KernelMemory.Text; using Pgvector; namespace Microsoft.KernelMemory.Postgres; @@ -272,7 +273,7 @@ private static string NormalizeTableNamePrefix(string? name) foreach (MemoryFilter filter in filters.Where(f => !f.IsEmpty())) { var andSql = new StringBuilder(); - andSql.AppendLine("("); + andSql.AppendLineNix("("); if (filter is PostgresMemoryFilter) { @@ -298,10 +299,10 @@ private static string NormalizeTableNamePrefix(string? name) // $"{PostgresSchema.PlaceholdersTags} @> " + safeSqlPlaceholder // $"{PostgresSchema.PlaceholdersTags} @> " + safeSqlPlaceholder + "::text[]" // $"{PostgresSchema.PlaceholdersTags} @> ARRAY[" + safeSqlPlaceholder + "]::text[]" - andSql.AppendLine($"{PostgresSchema.PlaceholdersTags} @> " + safeSqlPlaceholder); + andSql.AppendLineNix($"{PostgresSchema.PlaceholdersTags} @> " + safeSqlPlaceholder); } - andSql.AppendLine(")"); + andSql.AppendLineNix(")"); orConditions.Add(andSql.ToString()); } diff --git a/service/Abstractions/HTTP/SSE.cs b/service/Abstractions/HTTP/SSE.cs index f9786d96b..9ae751bc2 100644 --- a/service/Abstractions/HTTP/SSE.cs +++ b/service/Abstractions/HTTP/SSE.cs @@ -8,6 +8,7 @@ using System.Text; using System.Text.Json; using System.Threading; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.HTTP; @@ -39,7 +40,7 @@ public async static IAsyncEnumerable ParseStreamAsync( } else { - buffer.AppendLine(line); + buffer.AppendLineNix(line); } } diff --git a/service/Abstractions/Models/MemoryAnswer.cs b/service/Abstractions/Models/MemoryAnswer.cs index cde6f7177..ba542157e 100644 --- a/service/Abstractions/Models/MemoryAnswer.cs +++ b/service/Abstractions/Models/MemoryAnswer.cs @@ -6,6 +6,7 @@ using System.Text; using System.Text.Json; using System.Text.Json.Serialization; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory; @@ -92,7 +93,7 @@ public string ToJson(bool optimizeForStream) public override string ToString() { var result = new StringBuilder(); - result.AppendLine(this.Result); + result.AppendLineNix(this.Result); if (!this.NoResult && this.RelevantSources is { Count: > 0 }) { @@ -103,8 +104,8 @@ public override string ToString() sources[x.Index + x.Link] = $" - {x.SourceName} [{date}]"; } - result.AppendLine("- Sources:"); - result.AppendLine(string.Join("\n", sources.Values)); + result.AppendLineNix("- Sources:"); + result.AppendLineNix(string.Join("\n", sources.Values)); } return result.ToString(); diff --git a/service/Abstractions/Text/StringBuilderExtensions.cs b/service/Abstractions/Text/StringBuilderExtensions.cs new file mode 100644 index 000000000..fe3bcf43e --- /dev/null +++ b/service/Abstractions/Text/StringBuilderExtensions.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text; + +namespace Microsoft.KernelMemory.Text; + +public static class StringBuilderExtensions +{ + /// + /// Append line using Unix line ending "\n" + /// + public static void AppendLineNix(this StringBuilder sb) + { + sb.Append('\n'); + } + + /// + /// Append line using Unix line ending "\n" + /// + public static void AppendLineNix(this StringBuilder sb, string value) + { + sb.Append(value); + sb.Append('\n'); + } + + /// + /// Append line using Unix line ending "\n" + /// + public static void AppendLineNix(this StringBuilder sb, char value) + { + sb.Append(value); + sb.Append('\n'); + } + + /// + /// Append line using Unix line ending "\n" + /// + public static void AppendLineNix(this StringBuilder sb, StringBuilder value) + { + sb.Append(value); + sb.Append('\n'); + } +} diff --git a/service/Abstractions/Text/StringExtensions.cs b/service/Abstractions/Text/StringExtensions.cs new file mode 100644 index 000000000..08a92adba --- /dev/null +++ b/service/Abstractions/Text/StringExtensions.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.KernelMemory.Text; + +public static class StringExtensions +{ + public static string NormalizeNewlines(this string text, bool trim = false) + { + if (string.IsNullOrEmpty(text)) + { + return text; + } + + // We won't need more than the original length + char[] buffer = new char[text.Length]; + int bufferPos = 0; + + // Skip leading whitespace if trimming + int i = 0; + if (trim) + { + while (i < text.Length && char.IsWhiteSpace(text[i])) { i++; } + } + + // Tracks the last non-whitespace position written into buffer + int lastNonWhitespacePos = -1; + + // Single pass: replace \r\n or \r with \n, record last non-whitespace + for (; i < text.Length; i++) + { + char c = text[i]; + + if (c == '\r') + { + // If \r\n then skip the \n + if (i + 1 < text.Length && text[i + 1] == '\n') { i++; } + + // Write a single \n + buffer[bufferPos] = '\n'; + } + else + { + buffer[bufferPos] = c; + } + + // If trimming, update lastNonWhitespacePos only when char isn't whitespace + // If not trimming, always update because we keep everything + if (!trim || !char.IsWhiteSpace(buffer[bufferPos])) + { + lastNonWhitespacePos = bufferPos; + } + + bufferPos++; + } + + // Cut off trailing whitespace if trimming + // If every char was whitespace, lastNonWhitespacePos stays -1 and the result is an empty string + int finalLength = (trim && lastNonWhitespacePos >= 0) + ? lastNonWhitespacePos + 1 + : bufferPos; + + // Safety check if everything was trimmed away + if (finalLength < 0) { finalLength = 0; } + + return new string(buffer, 0, finalLength); + } +} diff --git a/service/Core/DataFormats/Image/ImageDecoder.cs b/service/Core/DataFormats/Image/ImageDecoder.cs index 8b38e591e..6cd070bf4 100644 --- a/service/Core/DataFormats/Image/ImageDecoder.cs +++ b/service/Core/DataFormats/Image/ImageDecoder.cs @@ -8,6 +8,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.DataFormats.Image; @@ -64,7 +65,7 @@ public async Task DecodeAsync(Stream data, CancellationToken cancel var result = new FileContent(MimeTypes.PlainText); var content = await this.ImageToTextAsync(data, cancellationToken).ConfigureAwait(false); - result.Sections.Add(new(content.Trim(), 1, Chunk.Meta(sentencesAreComplete: true))); + result.Sections.Add(new(content, 1, Chunk.Meta(sentencesAreComplete: true))); return result; } @@ -87,10 +88,14 @@ private async Task ImageToTextAsync(BinaryData data, CancellationToken c } } - private Task ImageToTextAsync(Stream data, CancellationToken cancellationToken = default) + private async Task ImageToTextAsync(Stream data, CancellationToken cancellationToken = default) { - return this._ocrEngine is null - ? throw new NotSupportedException($"Image extraction not configured") - : this._ocrEngine.ExtractTextFromImageAsync(data, cancellationToken); + if (this._ocrEngine is null) + { + throw new NotSupportedException($"Image extraction not configured"); + } + + string text = await this._ocrEngine.ExtractTextFromImageAsync(data, cancellationToken).ConfigureAwait(false); + return text.NormalizeNewlines(true); } } diff --git a/service/Core/DataFormats/Office/MsExcelDecoder.cs b/service/Core/DataFormats/Office/MsExcelDecoder.cs index f7f04e82b..bea867811 100644 --- a/service/Core/DataFormats/Office/MsExcelDecoder.cs +++ b/service/Core/DataFormats/Office/MsExcelDecoder.cs @@ -11,6 +11,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.DataFormats.Office; @@ -61,7 +62,7 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation worksheetNumber++; if (this._config.WithWorksheetNumber) { - sb.AppendLine(this._config.WorksheetNumberTemplate.Replace("{number}", $"{worksheetNumber}", StringComparison.OrdinalIgnoreCase)); + sb.AppendLineNix(this._config.WorksheetNumberTemplate.Replace("{number}", $"{worksheetNumber}", StringComparison.OrdinalIgnoreCase)); } var rowsUsed = worksheet.RangeUsed()?.RowsUsed(); @@ -142,15 +143,15 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation } } - sb.AppendLine(this._config.RowSuffix); + sb.AppendLineNix(this._config.RowSuffix); } if (this._config.WithEndOfWorksheetMarker) { - sb.AppendLine(this._config.EndOfWorksheetMarkerTemplate.Replace("{number}", $"{worksheetNumber}", StringComparison.OrdinalIgnoreCase)); + sb.AppendLineNix(this._config.EndOfWorksheetMarkerTemplate.Replace("{number}", $"{worksheetNumber}", StringComparison.OrdinalIgnoreCase)); } - string worksheetContent = sb.ToString().Trim(); + string worksheetContent = sb.ToString().NormalizeNewlines(true); sb.Clear(); result.Sections.Add(new Chunk(worksheetContent, worksheetNumber, Chunk.Meta(sentencesAreComplete: true))); } diff --git a/service/Core/DataFormats/Office/MsPowerPointDecoder.cs b/service/Core/DataFormats/Office/MsPowerPointDecoder.cs index 4118fda02..2be7b5c67 100644 --- a/service/Core/DataFormats/Office/MsPowerPointDecoder.cs +++ b/service/Core/DataFormats/Office/MsPowerPointDecoder.cs @@ -13,6 +13,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.DataFormats.Office; @@ -99,20 +100,20 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation // Prepend slide number before the slide text if (this._config.WithSlideNumber) { - sb.AppendLine(this._config.SlideNumberTemplate.Replace("{number}", $"{slideNumber}", StringComparison.OrdinalIgnoreCase)); + sb.AppendLineNix(this._config.SlideNumberTemplate.Replace("{number}", $"{slideNumber}", StringComparison.OrdinalIgnoreCase)); } sb.Append(currentSlideContent); - sb.AppendLine(); + sb.AppendLineNix(); // Append the end of slide marker if (this._config.WithEndOfSlideMarker) { - sb.AppendLine(this._config.EndOfSlideMarkerTemplate.Replace("{number}", $"{slideNumber}", StringComparison.OrdinalIgnoreCase)); + sb.AppendLineNix(this._config.EndOfSlideMarkerTemplate.Replace("{number}", $"{slideNumber}", StringComparison.OrdinalIgnoreCase)); } } - string slideContent = sb.ToString().Trim(); + string slideContent = sb.ToString().NormalizeNewlines(true); sb.Clear(); result.Sections.Add(new Chunk(slideContent, slideNumber, Chunk.Meta(sentencesAreComplete: true))); } diff --git a/service/Core/DataFormats/Office/MsWordDecoder.cs b/service/Core/DataFormats/Office/MsWordDecoder.cs index 5f08de347..7b6c1a1b1 100644 --- a/service/Core/DataFormats/Office/MsWordDecoder.cs +++ b/service/Core/DataFormats/Office/MsWordDecoder.cs @@ -12,6 +12,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.DataFormats.Office; @@ -79,17 +80,19 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation var lastRenderedPageBreak = p.GetFirstChild()?.GetFirstChild(); if (lastRenderedPageBreak != null) { - string pageContent = sb.ToString().Trim(); + // Note: no trimming, use original spacing when working with pages + string pageContent = sb.ToString().NormalizeNewlines(false); sb.Clear(); result.Sections.Add(new Chunk(pageContent, pageNumber, Chunk.Meta(sentencesAreComplete: true))); pageNumber++; } - sb.AppendLine(p.InnerText); + sb.AppendLineNix(p.InnerText); } } - var lastPageContent = sb.ToString().Trim(); + // Note: no trimming, use original spacing when working with pages + string lastPageContent = sb.ToString().NormalizeNewlines(false); result.Sections.Add(new Chunk(lastPageContent, pageNumber, Chunk.Meta(sentencesAreComplete: true))); return Task.FromResult(result); diff --git a/service/Core/DataFormats/Pdf/PdfDecoder.cs b/service/Core/DataFormats/Pdf/PdfDecoder.cs index 4a4185899..736d9017e 100644 --- a/service/Core/DataFormats/Pdf/PdfDecoder.cs +++ b/service/Core/DataFormats/Pdf/PdfDecoder.cs @@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; using UglyToad.PdfPig; using UglyToad.PdfPig.Content; using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor; @@ -56,8 +57,9 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation foreach (Page? page in pdfDocument.GetPages().Where(x => x != null)) { - // Note: no trimming, use original spacing - string pageContent = ContentOrderTextExtractor.GetText(page) ?? string.Empty; + // Note: no trimming, use original spacing when working with pages + string pageContent = ContentOrderTextExtractor.GetText(page).NormalizeNewlines(false) ?? string.Empty; + result.Sections.Add(new Chunk(pageContent, page.Number, Chunk.Meta(sentencesAreComplete: false))); } diff --git a/service/Core/DataFormats/WebPages/HtmlDecoder.cs b/service/Core/DataFormats/WebPages/HtmlDecoder.cs index 45bd67fe7..23bfcf3d9 100644 --- a/service/Core/DataFormats/WebPages/HtmlDecoder.cs +++ b/service/Core/DataFormats/WebPages/HtmlDecoder.cs @@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.DataFormats.WebPages; @@ -51,7 +52,7 @@ public Task DecodeAsync(Stream data, CancellationToken cancellation var doc = new HtmlDocument(); doc.Load(data); - result.Sections.Add(new Chunk(doc.DocumentNode.InnerText.Trim(), 1, Chunk.Meta(sentencesAreComplete: true))); + result.Sections.Add(new Chunk(doc.DocumentNode.InnerText.NormalizeNewlines(true), 1, Chunk.Meta(sentencesAreComplete: true))); return Task.FromResult(result); } diff --git a/service/Core/Handlers/SummarizationHandler.cs b/service/Core/Handlers/SummarizationHandler.cs index a01daf1ea..042f04f84 100644 --- a/service/Core/Handlers/SummarizationHandler.cs +++ b/service/Core/Handlers/SummarizationHandler.cs @@ -13,6 +13,7 @@ using Microsoft.KernelMemory.Extensions; using Microsoft.KernelMemory.Pipeline; using Microsoft.KernelMemory.Prompts; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.Handlers; @@ -214,7 +215,7 @@ public SummarizationHandler( newContent.Append(token); } - newContent.AppendLine(); + newContent.AppendLineNix(); } content = newContent.ToString(); diff --git a/service/Core/Handlers/SummarizationParallelHandler.cs b/service/Core/Handlers/SummarizationParallelHandler.cs index 04f5b28d7..dfcf3c3d9 100644 --- a/service/Core/Handlers/SummarizationParallelHandler.cs +++ b/service/Core/Handlers/SummarizationParallelHandler.cs @@ -12,6 +12,7 @@ using Microsoft.KernelMemory.Extensions; using Microsoft.KernelMemory.Pipeline; using Microsoft.KernelMemory.Prompts; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.Handlers; @@ -188,7 +189,7 @@ await Parallel.ForEachAsync(uploadedFile.GeneratedFiles, options, async (generat newContent.Append(token); } - newContent.AppendLine(); + newContent.AppendLineNix(); } content = newContent.ToString(); diff --git a/service/Core/Handlers/TextExtractionHandler.cs b/service/Core/Handlers/TextExtractionHandler.cs index 05279ea2b..a9ffb9bb6 100644 --- a/service/Core/Handlers/TextExtractionHandler.cs +++ b/service/Core/Handlers/TextExtractionHandler.cs @@ -12,6 +12,7 @@ using Microsoft.KernelMemory.DataFormats.WebPages; using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; +using Microsoft.KernelMemory.Text; namespace Microsoft.KernelMemory.Handlers; @@ -224,8 +225,8 @@ public void Dispose() // Add a clean page separation if (section.SentencesAreComplete) { - textBuilder.AppendLine(); - textBuilder.AppendLine(); + textBuilder.AppendLineNix(); + textBuilder.AppendLineNix(); } } diff --git a/service/tests/Abstractions.UnitTests/Text/StringExtensionsTest.cs b/service/tests/Abstractions.UnitTests/Text/StringExtensionsTest.cs new file mode 100644 index 000000000..94203e594 --- /dev/null +++ b/service/tests/Abstractions.UnitTests/Text/StringExtensionsTest.cs @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.KernelMemory.Text; + +namespace Microsoft.KM.Abstractions.UnitTests.Text; + +public class StringExtensionsTest +{ + [Theory] + [Trait("Category", "UnitTest")] + [InlineData(null, null)] + [InlineData("", "")] + [InlineData(" ", " ")] + [InlineData("\n", "\n")] + [InlineData("\r", "\n")] // Old Mac + [InlineData("\r\n", "\n")] // Windows + [InlineData("\n\r", "\n\n")] // Not standard, that's 2 line endings + [InlineData("\n\n\n", "\n\n\n")] + [InlineData("\r\r\r", "\n\n\n")] + [InlineData("\r\r\n\r", "\n\n\n")] + [InlineData("\n\r\n\r", "\n\n\n")] + [InlineData("ciao", "ciao")] + [InlineData("ciao ", "ciao ")] + [InlineData(" ciao ", " ciao ")] + [InlineData("\r ciao ", "\n ciao ")] + [InlineData(" \rciao ", " \nciao ")] + [InlineData(" \r\nciao ", " \nciao ")] + [InlineData(" \r\nciao\n ", " \nciao\n ")] + [InlineData(" \r\nciao \n", " \nciao \n")] + [InlineData(" \r\nciao \r", " \nciao \n")] + [InlineData(" \r\nciao \rn", " \nciao \nn")] + public void ItNormalizesLineEndings(string? input, string? expected) + { + // Act +#pragma warning disable CS8604 // it's an extension method, internally it handles the null scenario + string actual = input.NormalizeNewlines(); +#pragma warning restore CS8604 + + // Assert + Assert.Equal(expected, actual); + } + + [Theory] + [Trait("Category", "UnitTest")] + [InlineData(null, null)] + [InlineData("", "")] + [InlineData(" ", "")] + [InlineData("\n", "")] + [InlineData("\r", "")] + [InlineData("\r\n", "")] + [InlineData("\n\r", "")] + [InlineData("\n\n\n", "")] + [InlineData("\r\r\r", "")] + [InlineData("\r\r\n\r", "")] + [InlineData("\n\r\n\r", "")] + [InlineData("ciao", "ciao")] + [InlineData("ciao ", "ciao")] + [InlineData(" ciao ", "ciao")] + [InlineData("\r ciao ", "ciao")] + [InlineData(" \rciao ", "ciao")] + [InlineData(" \r\nciao ", "ciao")] + [InlineData(" \r\nciao\n ", "ciao")] + [InlineData(" \r\nciao \n", "ciao")] + [InlineData(" \r\nciao \r", "ciao")] + [InlineData(" \r\nciao \rn", "ciao \nn")] + [InlineData(" \r\nc\ri\ra\no \r", "c\ni\na\no")] + [InlineData(" \r\nc\r\ni\n\na\r\ro \r", "c\ni\n\na\n\no")] + [InlineData(" \r\nccc\r\ni\n\naaa\r\ro \r", "ccc\ni\n\naaa\n\no")] + public void ItCanTrimWhileNormalizingLineEndings(string? input, string? expected) + { + // Act +#pragma warning disable CS8604 // it's an extension method, internally it handles the null scenario + string actual = input.NormalizeNewlines(true); +#pragma warning restore CS8604 + + // Assert + Assert.Equal(expected, actual); + } +}