Cherry picked #19540 to V16 (and fixed changed signatures) (#19592)

This commit is contained in:
Kenn Jacobsen
2025-06-26 09:16:49 +02:00
committed by GitHub
parent 67106f0813
commit c61fc7419c
2 changed files with 103 additions and 1 deletions

View File

@@ -1,5 +1,6 @@
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using System.Text.RegularExpressions;
using Umbraco.Cms.Core.Configuration.Models; using Umbraco.Cms.Core.Configuration.Models;
using Umbraco.Cms.Core.Models; using Umbraco.Cms.Core.Models;
using Umbraco.Cms.Core.Serialization; using Umbraco.Cms.Core.Serialization;
@@ -50,7 +51,7 @@ internal class RichTextPropertyIndexValueFactory : BlockValuePropertyIndexValueF
}; };
// the actual content (RTE content without markup, i.e. the actual words) must be indexed under the property alias // the actual content (RTE content without markup, i.e. the actual words) must be indexed under the property alias
var richTextWithoutMarkup = richTextEditorValue.Markup.StripHtml(); var richTextWithoutMarkup = StripHtmlForIndexing(richTextEditorValue.Markup);
if (richTextEditorValue.Blocks?.ContentData.Any() is not true) if (richTextEditorValue.Blocks?.ContentData.Any() is not true)
{ {
// no blocks; index the content for the culture and be done with it // no blocks; index the content for the culture and be done with it
@@ -132,4 +133,27 @@ internal class RichTextPropertyIndexValueFactory : BlockValuePropertyIndexValueF
protected override IEnumerable<RawDataItem> GetDataItems(RichTextEditorValue input, bool published) protected override IEnumerable<RawDataItem> GetDataItems(RichTextEditorValue input, bool published)
=> GetDataItems(input.Blocks?.ContentData ?? [], input.Blocks?.Expose ?? [], published); => GetDataItems(input.Blocks?.ContentData ?? [], input.Blocks?.Expose ?? [], published);
/// <summary>
/// Strips HTML tags from content while preserving whitespace from line breaks.
/// This addresses the issue where &lt;br&gt; tags don't create word boundaries when HTML is stripped.
/// </summary>
/// <param name="html">The HTML content to strip</param>
/// <returns>Plain text with proper word boundaries</returns>
private static string StripHtmlForIndexing(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return string.Empty;
}
// Replace <br> and <br/> tags (with any amount of whitespace and attributes) with spaces
// This regex matches:
// - <br> (with / without spaces or attributes)
// - <br /> (with / without spaces or attributes)
html = Regex.Replace(html, @"<br\b[^>]*/?>\s*", " ", RegexOptions.IgnoreCase);
// Use the existing Microsoft StripHtml function for everything else
return html.StripHtml();
}
} }

View File

@@ -0,0 +1,78 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Moq;
using NUnit.Framework;
using Umbraco.Cms.Core.Configuration.Models;
using Umbraco.Cms.Core.Models;
using Umbraco.Cms.Core.PropertyEditors;
using Umbraco.Cms.Core.Serialization;
namespace Umbraco.Cms.Tests.UnitTests.Umbraco.Core.PropertyEditors;
/// <summary>
/// Tests for <see cref="RichTextPropertyIndexValueFactory"/> to ensure it correctly creates index values from rich text properties.
/// </summary>
public class RichTextPropertyIndexValueFactoryTests
{
/// <summary>
/// Tests that the factory can create index values from a rich text property with valid content
/// </summary>
/// <param name="testContent"></param>
/// <param name="expected"></param>
[TestCase("<p>Sample text</p>", "Sample text")]
[TestCase("<p>John Smith<br>Company ABC<br>London</p>", "John Smith Company ABC London")]
[TestCase("<p>John Smith<break>Company ABC<break>London</p>", "John SmithCompany ABCLondon")]
[TestCase("<p>John Smith<br>Company ABC<branything>London</p>", "John Smith Company ABCLondon")]
[TestCase("<p>Another sample text with <strong>bold</strong> content</p>", "Another sample text with bold content")]
[TestCase("<p>Text with <a href=\"https://example.com\">link</a></p>", "Text with link")]
[TestCase("<p>Text with <img src=\"image.jpg\" alt=\"image\" /></p>", "Text with")]
[TestCase("<p>Text with <span style=\"color: red;\">styled text</span></p>", "Text with styled text")]
[TestCase("<p>Text with <em>emphasized</em> content</p>", "Text with emphasized content")]
[TestCase("<p>Text with <u>underlined</u> content</p>", "Text with underlined content")]
[TestCase("<p>Text with <code>inline code</code></p>", "Text with inline code")]
[TestCase("<p>Text with <pre><code>code block</code></pre></p>", "Text with code block")]
[TestCase("<p>Text with <blockquote>quoted text</blockquote></p>", "Text with quoted text")]
[TestCase("<p>Text with <ul><li>list item 1</li><li>list item 2</li></ul></p>",
"Text with list item 1list item 2")]
[TestCase("<p>Text with <ol><li>ordered item 1</li><li>ordered item 2</li></ol></p>",
"Text with ordered item 1ordered item 2")]
[TestCase("<p>Text with <div class=\"class-name\">div content</div></p>", "Text with div content")]
[TestCase("<p>Text with <span class=\"class-name\">span content</span></p>", "Text with span content")]
[TestCase("<p>Text with <strong>bold</strong> and <em>italic</em> content</p>",
"Text with bold and italic content")]
[TestCase("<p>Text with <a href=\"https://example.com\" target=\"_blank\">external link</a></p>",
"Text with external link")]
[TestCase("<p>John Smith<br class=\"test\">Company ABC<br>London</p>", "John Smith Company ABC London")]
[TestCase("<p>John Smith<br \r\n />Company ABC<br>London</p>", "John Smith Company ABC London")]
public void Can_Create_Index_Values_From_RichText_Property(string testContent, string expected)
{
var propertyEditorCollection = new PropertyEditorCollection(new DataEditorCollection(() => null));
var jsonSerializer = Mock.Of<IJsonSerializer>();
var indexingSettings = Mock.Of<IOptionsMonitor<IndexingSettings>>();
Mock.Get(indexingSettings).Setup(x => x.CurrentValue).Returns(new IndexingSettings { });
var logger = Mock.Of<ILogger<RichTextPropertyIndexValueFactory>>();
string alias = "richText";
var factory = new RichTextPropertyIndexValueFactory(
propertyEditorCollection,
jsonSerializer,
indexingSettings,
logger);
// create a mock property with the rich text value
var property = Mock.Of<IProperty>(p => p.Alias == alias
&& (string)p.GetValue(It.IsAny<string>(), It.IsAny<string>(),
It.IsAny<bool>()) == testContent);
// get the index value for the property
var indexValue = factory
.GetIndexValues(property, null, null, true, [], new Dictionary<Guid, IContentType>())
.FirstOrDefault(kvp => kvp.FieldName == alias);
Assert.IsNotNull(indexValue);
// assert that index the value is created correctly (it might contain a trailing whitespace, but that's OK)
var expectedIndexValue = indexValue.Values.SingleOrDefault() as string;
Assert.IsNotNull(expectedIndexValue);
Assert.AreEqual(expected, expectedIndexValue.TrimEnd());
}
}