From 4f7de35e0445686e3c9deb971dddecce7f7c1ac1 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 26 Nov 2018 12:06:38 +1100 Subject: [PATCH] Adds HtmlValueType --- src/Umbraco.Core/StringExtensions.cs | 2 +- src/Umbraco.Examine/HtmlValueType.cs | 43 ++++++++++++++++++++ src/Umbraco.Examine/Umbraco.Examine.csproj | 3 +- src/Umbraco.Examine/UmbracoExamineIndexer.cs | 33 --------------- src/Umbraco.Tests/Umbraco.Tests.csproj | 2 +- src/Umbraco.Web.UI/Umbraco.Web.UI.csproj | 2 +- src/Umbraco.Web/Umbraco.Web.csproj | 2 +- 7 files changed, 49 insertions(+), 38 deletions(-) create mode 100644 src/Umbraco.Examine/HtmlValueType.cs diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs index 9c686c4353..03a371204c 100644 --- a/src/Umbraco.Core/StringExtensions.cs +++ b/src/Umbraco.Core/StringExtensions.cs @@ -540,7 +540,7 @@ namespace Umbraco.Core public static string StripHtml(this string text) { const string pattern = @"<(.|\n)*?>"; - return Regex.Replace(text, pattern, String.Empty); + return Regex.Replace(text, pattern, string.Empty, RegexOptions.Compiled); } /// diff --git a/src/Umbraco.Examine/HtmlValueType.cs b/src/Umbraco.Examine/HtmlValueType.cs new file mode 100644 index 0000000000..f55023f053 --- /dev/null +++ b/src/Umbraco.Examine/HtmlValueType.cs @@ -0,0 +1,43 @@ +using Lucene.Net.Documents; +using Umbraco.Core; +using Examine.LuceneEngine.Indexing; +using Umbraco.Core.Xml; + +namespace Umbraco.Examine +{ + /// + /// Strips HTML symbols from the text + /// + public class HtmlValueType : FullTextType + { + private readonly bool _storeRawValue; + + public HtmlValueType(string fieldName, bool storeRawValue) : base(fieldName, false) + { + _storeRawValue = storeRawValue; + } + + protected override void AddSingleValue(Document doc, object value) + { + if (TryConvert(value, out var str)) + { + if (XmlHelper.CouldItBeXml(str)) + { + base.AddSingleValue(doc, str.StripHtml()); + + if (_storeRawValue) + { + doc.Add(new Field(UmbracoExamineIndexer.RawFieldPrefix + FieldName, str, + Field.Store.YES, + Field.Index.NO, + Field.TermVector.NO)); + } + } + else + base.AddSingleValue(doc, str); + } + else + base.AddSingleValue(doc, str); + } + } +} diff --git a/src/Umbraco.Examine/Umbraco.Examine.csproj b/src/Umbraco.Examine/Umbraco.Examine.csproj index db3570d380..8065cc799c 100644 --- a/src/Umbraco.Examine/Umbraco.Examine.csproj +++ b/src/Umbraco.Examine/Umbraco.Examine.csproj @@ -48,7 +48,7 @@ - + @@ -59,6 +59,7 @@ + diff --git a/src/Umbraco.Examine/UmbracoExamineIndexer.cs b/src/Umbraco.Examine/UmbracoExamineIndexer.cs index fd56ac7fc4..1fb3b0c3a3 100644 --- a/src/Umbraco.Examine/UmbracoExamineIndexer.cs +++ b/src/Umbraco.Examine/UmbracoExamineIndexer.cs @@ -19,39 +19,6 @@ using Directory = Lucene.Net.Store.Directory; namespace Umbraco.Examine { - public class HtmlValueType : IndexValueTypeBase - { - public HtmlValueType(string fieldName, bool store = true) : base(fieldName, store) - { - } - - protected override void AddSingleValue(Document doc, object value) - { - //TODO: Make this happen so we can properly analyze/tokenize html, maybe we only need an analyzer though - - throw new NotImplementedException(); - } - } - - public class HtmlAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter( //case insensitive - new EmailAddressTokenizer(reader)); //email tokenizer - } - - /// - /// Used for email addresses - /// - public class HtmlTokenizer : Tokenizer - { - public override bool IncrementToken() - { - throw new NotImplementedException(); - } - } - } /// /// An abstract provider containing the basic functionality to be able to query against diff --git a/src/Umbraco.Tests/Umbraco.Tests.csproj b/src/Umbraco.Tests/Umbraco.Tests.csproj index 84ddda4335..c68be3a26d 100644 --- a/src/Umbraco.Tests/Umbraco.Tests.csproj +++ b/src/Umbraco.Tests/Umbraco.Tests.csproj @@ -77,7 +77,7 @@ - + 1.8.9 diff --git a/src/Umbraco.Web.UI/Umbraco.Web.UI.csproj b/src/Umbraco.Web.UI/Umbraco.Web.UI.csproj index 90543b6faf..18d66a4111 100644 --- a/src/Umbraco.Web.UI/Umbraco.Web.UI.csproj +++ b/src/Umbraco.Web.UI/Umbraco.Web.UI.csproj @@ -88,7 +88,7 @@ - + diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index 9c98f81894..2f8613df73 100755 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -62,7 +62,7 @@ - + 2.6.2.25