diff --git a/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyIndexValueFactory.cs b/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyIndexValueFactory.cs
index 0eb1ee257a..6013e6c4c2 100644
--- a/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyIndexValueFactory.cs
+++ b/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyIndexValueFactory.cs
@@ -1,5 +1,6 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
+using System.Text.RegularExpressions;
using Umbraco.Cms.Core.Configuration.Models;
using Umbraco.Cms.Core.Models;
using Umbraco.Cms.Core.Serialization;
@@ -50,7 +51,7 @@ internal class RichTextPropertyIndexValueFactory : BlockValuePropertyIndexValueF
};
// the actual content (RTE content without markup, i.e. the actual words) must be indexed under the property alias
- var richTextWithoutMarkup = richTextEditorValue.Markup.StripHtml();
+ var richTextWithoutMarkup = StripHtmlForIndexing(richTextEditorValue.Markup);
if (richTextEditorValue.Blocks?.ContentData.Any() is not true)
{
// no blocks; index the content for the culture and be done with it
@@ -132,4 +133,27 @@ internal class RichTextPropertyIndexValueFactory : BlockValuePropertyIndexValueF
protected override IEnumerable Sample text John Smith John Smith John Smith Another sample text with bold content Text with link Text with Text with styled text Text with emphasized content Text with underlined content Text with Text with
and
tags (with any amount of whitespace and attributes) with spaces
+ // This regex matches:
+ // -
(with / without spaces or attributes)
+ // -
(with / without spaces or attributes)
+ html = Regex.Replace(html, @"
]*/?>\s*", " ", RegexOptions.IgnoreCase);
+
+ // Use the existing Microsoft StripHtml function for everything else
+ return html.StripHtml();
+ }
}
diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/PropertyEditors/RichTextPropertyIndexValueFactoryTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/PropertyEditors/RichTextPropertyIndexValueFactoryTests.cs
new file mode 100644
index 0000000000..826395a64a
--- /dev/null
+++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/PropertyEditors/RichTextPropertyIndexValueFactoryTests.cs
@@ -0,0 +1,78 @@
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+using Moq;
+using NUnit.Framework;
+using Umbraco.Cms.Core.Configuration.Models;
+using Umbraco.Cms.Core.Models;
+using Umbraco.Cms.Core.PropertyEditors;
+using Umbraco.Cms.Core.Serialization;
+
+namespace Umbraco.Cms.Tests.UnitTests.Umbraco.Core.PropertyEditors;
+
+///
Company ABC
London
Company ABC
inline codecode block
Text with
quoted text", "Text with quoted text")] + [TestCase("
Text with
Text with
Text with
Text with span content
", "Text with span content")] + [TestCase("Text with bold and italic content
", + "Text with bold and italic content")] + [TestCase("Text with external link
", + "Text with external link")] + [TestCase("John Smith
Company ABC
London
John Smith
Company ABC
London