From e6967a41cd4d9fdf286e08ba87c2736ee14501b3 Mon Sep 17 00:00:00 2001 From: Andy Butland Date: Fri, 23 May 2025 12:28:18 +0200 Subject: [PATCH] Removes unnecessary newlines from rich text as JSON delivery API output (#19391) * Removes unnecessary newlines from rich text as JSON delivery API output. * Fix case from PR feedback. # Conflicts: # src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs # tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs --- .../DeliveryApi/ApiRichTextElementParser.cs | 7 ++- .../DeliveryApi/RichTextParserTests.cs | 57 ++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs b/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs index 54dc6b0b85..b54976ff40 100644 --- a/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs +++ b/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs @@ -98,9 +98,9 @@ internal sealed class ApiRichTextElementParser : ApiRichTextParserBase, IApiRich // - non-#comment nodes // - non-#text nodes // - non-empty #text nodes - // - empty #text between inline elements (see #17037) + // - empty #text between inline elements (see #17037) but not #text with only newlines (see #19388) HtmlNode[] childNodes = element.ChildNodes - .Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || c.NextSibling is not null || string.IsNullOrWhiteSpace(c.InnerText) is false)) + .Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || IsNonEmptyElement(c))) .ToArray(); var tag = TagName(element); @@ -121,6 +121,9 @@ internal sealed class ApiRichTextElementParser : ApiRichTextParserBase, IApiRich return createElement(tag, attributes, childElements); } + private static bool IsNonEmptyElement(HtmlNode htmlNode) => + string.IsNullOrWhiteSpace(htmlNode.InnerText) is false || htmlNode.InnerText.Any(c => c != '\n' && c != '\r'); + private static string TagName(HtmlNode htmlNode) => htmlNode.Name; private void ReplaceLocalLinks(IPublishedContentCache contentCache, IPublishedMediaCache mediaCache, Dictionary attributes) diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs index 51c6a21ecf..f113c9a998 100644 --- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs +++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs @@ -357,16 +357,71 @@ public class RichTextParserTests : PropertyValueConverterTests Assert.IsEmpty(blockLevelBlock.Elements); } + private const string TestParagraph = "What follows from here is just a bunch of text."; + [Test] public void ParseElement_CanHandleWhitespaceAroundInlineElemements() { var parser = CreateRichTextElementParser(); - var element = parser.Parse("

What follows from here is just a bunch of text.

", RichTextBlockModel.Empty) as RichTextRootElement; + var element = parser.Parse($"

{TestParagraph}

", RichTextBlockModel.Empty) as RichTextRootElement; Assert.IsNotNull(element); var paragraphElement = element.Elements.Single() as RichTextGenericElement; Assert.IsNotNull(paragraphElement); + AssertTestParagraph(paragraphElement); + } + + [TestCase(1, "\n")] + [TestCase(2, "\n")] + [TestCase(1, "\r")] + [TestCase(2, "\r")] + [TestCase(1, "\r\n")] + [TestCase(2, "\r\n")] + public void ParseElement_RemovesNewLinesAroundHtmlStructuralElements(int numberOfNewLineCharacters, string newlineCharacter) + { + var parser = CreateRichTextElementParser(); + + var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters)); + var element = parser.Parse($"{newLineSeparator}{newLineSeparator}{newLineSeparator}{newLineSeparator}
{TestParagraph}
", RichTextBlockModel.Empty) as RichTextRootElement; + Assert.IsNotNull(element); + var tableElement = element.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(tableElement); + + var rowElement = tableElement.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(rowElement); + + var cellElement = rowElement.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(cellElement); + + AssertTestParagraph(cellElement); + } + + [TestCase(1, "\n")] + [TestCase(2, "\n")] + [TestCase(1, "\r")] + [TestCase(2, "\r")] + [TestCase(1, "\r\n")] + [TestCase(2, "\r\n")] + public void ParseElement_RemovesNewLinesAroundHtmlContentElements(int numberOfNewLineCharacters, string newlineCharacter) + { + var parser = CreateRichTextElementParser(); + + var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters)); + var element = parser.Parse($"

{TestParagraph}

{newLineSeparator}

{newLineSeparator}

 

{newLineSeparator}

{TestParagraph}

", RichTextBlockModel.Empty) as RichTextRootElement; + Assert.IsNotNull(element); + var divElement = element.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(divElement); + + var paragraphELements = divElement.Elements; + Assert.AreEqual(4, paragraphELements.Count()); + + AssertTestParagraph(paragraphELements.First() as RichTextGenericElement); + AssertTestParagraph(paragraphELements.Last() as RichTextGenericElement); + } + + private static void AssertTestParagraph(RichTextGenericElement paragraphElement) + { var childElements = paragraphElement.Elements.ToArray(); Assert.AreEqual(7, childElements.Length);