Removes unnecessary newlines from rich text as JSON delivery API output (#19391)

* Removes unnecessary newlines from rich text as JSON delivery API output.

* Fix case from PR feedback.
# Conflicts:
#	src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs
#	tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs
This commit is contained in:
Andy Butland
2025-05-23 12:28:18 +02:00
parent b87ae6ce46
commit e6967a41cd
2 changed files with 61 additions and 3 deletions

View File

@@ -98,9 +98,9 @@ internal sealed class ApiRichTextElementParser : ApiRichTextParserBase, IApiRich
// - non-#comment nodes
// - non-#text nodes
// - non-empty #text nodes
// - empty #text between inline elements (see #17037)
// - empty #text between inline elements (see #17037) but not #text with only newlines (see #19388)
HtmlNode[] childNodes = element.ChildNodes
.Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || c.NextSibling is not null || string.IsNullOrWhiteSpace(c.InnerText) is false))
.Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || IsNonEmptyElement(c)))
.ToArray();
var tag = TagName(element);
@@ -121,6 +121,9 @@ internal sealed class ApiRichTextElementParser : ApiRichTextParserBase, IApiRich
return createElement(tag, attributes, childElements);
}
private static bool IsNonEmptyElement(HtmlNode htmlNode) =>
string.IsNullOrWhiteSpace(htmlNode.InnerText) is false || htmlNode.InnerText.Any(c => c != '\n' && c != '\r');
private static string TagName(HtmlNode htmlNode) => htmlNode.Name;
private void ReplaceLocalLinks(IPublishedContentCache contentCache, IPublishedMediaCache mediaCache, Dictionary<string, object> attributes)

View File

@@ -357,16 +357,71 @@ public class RichTextParserTests : PropertyValueConverterTests
Assert.IsEmpty(blockLevelBlock.Elements);
}
private const string TestParagraph = "What follows from <strong>here</strong> <em>is</em> <a href=\"#\">just</a> a bunch of text.";
[Test]
public void ParseElement_CanHandleWhitespaceAroundInlineElemements()
{
var parser = CreateRichTextElementParser();
var element = parser.Parse("<p>What follows from <strong>here</strong> <em>is</em> <a href=\"#\">just</a> a bunch of text.</p>", RichTextBlockModel.Empty) as RichTextRootElement;
var element = parser.Parse($"<p>{TestParagraph}</p>", RichTextBlockModel.Empty) as RichTextRootElement;
Assert.IsNotNull(element);
var paragraphElement = element.Elements.Single() as RichTextGenericElement;
Assert.IsNotNull(paragraphElement);
AssertTestParagraph(paragraphElement);
}
[TestCase(1, "\n")]
[TestCase(2, "\n")]
[TestCase(1, "\r")]
[TestCase(2, "\r")]
[TestCase(1, "\r\n")]
[TestCase(2, "\r\n")]
public void ParseElement_RemovesNewLinesAroundHtmlStructuralElements(int numberOfNewLineCharacters, string newlineCharacter)
{
var parser = CreateRichTextElementParser();
var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters));
var element = parser.Parse($"<table>{newLineSeparator}<tr>{newLineSeparator}<td>{TestParagraph}</td>{newLineSeparator}</tr>{newLineSeparator}</table>", RichTextBlockModel.Empty) as RichTextRootElement;
Assert.IsNotNull(element);
var tableElement = element.Elements.Single() as RichTextGenericElement;
Assert.IsNotNull(tableElement);
var rowElement = tableElement.Elements.Single() as RichTextGenericElement;
Assert.IsNotNull(rowElement);
var cellElement = rowElement.Elements.Single() as RichTextGenericElement;
Assert.IsNotNull(cellElement);
AssertTestParagraph(cellElement);
}
[TestCase(1, "\n")]
[TestCase(2, "\n")]
[TestCase(1, "\r")]
[TestCase(2, "\r")]
[TestCase(1, "\r\n")]
[TestCase(2, "\r\n")]
public void ParseElement_RemovesNewLinesAroundHtmlContentElements(int numberOfNewLineCharacters, string newlineCharacter)
{
var parser = CreateRichTextElementParser();
var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters));
var element = parser.Parse($"<div><p>{TestParagraph}</p>{newLineSeparator}<p></p>{newLineSeparator}<p>&nbsp;</p>{newLineSeparator}<p>{TestParagraph}</p></div>", RichTextBlockModel.Empty) as RichTextRootElement;
Assert.IsNotNull(element);
var divElement = element.Elements.Single() as RichTextGenericElement;
Assert.IsNotNull(divElement);
var paragraphELements = divElement.Elements;
Assert.AreEqual(4, paragraphELements.Count());
AssertTestParagraph(paragraphELements.First() as RichTextGenericElement);
AssertTestParagraph(paragraphELements.Last() as RichTextGenericElement);
}
private static void AssertTestParagraph(RichTextGenericElement paragraphElement)
{
var childElements = paragraphElement.Elements.ToArray();
Assert.AreEqual(7, childElements.Length);