From af942b2558d11302c509038f11a7f110205b8b66 Mon Sep 17 00:00:00 2001 From: Paul Johnson Date: Mon, 15 Nov 2021 13:24:20 +0000 Subject: [PATCH] V9/bugfix/fix lucene immense raw fields prevent indexing (#11599) * Added failing test to demonstrate issue with large raw_ fields. * Switched to StoredField to avoid indexing error for immense fields. StringField indexes all the content as a single token and has a max length of 32766. StoredField does not analyze/index the field but enables retrieval with luceneSearcher.Doc(docId) Closes GH #11487 --- .../UmbracoExamineIndex.cs | 7 +--- .../UmbracoExamine/IndexTest.cs | 39 +++++++++++++++++++ .../Umbraco.Tests.Integration.csproj | 1 + 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/Umbraco.Examine.Lucene/UmbracoExamineIndex.cs b/src/Umbraco.Examine.Lucene/UmbracoExamineIndex.cs index 2f7cb646a2..5121c32d32 100644 --- a/src/Umbraco.Examine.Lucene/UmbracoExamineIndex.cs +++ b/src/Umbraco.Examine.Lucene/UmbracoExamineIndex.cs @@ -8,8 +8,6 @@ using Examine; using Examine.Lucene; using Examine.Lucene.Providers; using Lucene.Net.Documents; -using Lucene.Net.Index; -using Lucene.Net.Store; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Umbraco.Cms.Core; @@ -103,10 +101,7 @@ namespace Umbraco.Cms.Infrastructure.Examine //remove the original value so we can store it the correct way d.RemoveField(f.Key); - d.Add(new StringField( - f.Key, - f.Value[0].ToString(), - Field.Store.YES)); + d.Add(new StoredField(f.Key, f.Value[0].ToString())); } } diff --git a/tests/Umbraco.Tests.Integration/Umbraco.Examine.Lucene/UmbracoExamine/IndexTest.cs b/tests/Umbraco.Tests.Integration/Umbraco.Examine.Lucene/UmbracoExamine/IndexTest.cs index f6362a8156..2974382890 100644 --- a/tests/Umbraco.Tests.Integration/Umbraco.Examine.Lucene/UmbracoExamine/IndexTest.cs +++ b/tests/Umbraco.Tests.Integration/Umbraco.Examine.Lucene/UmbracoExamine/IndexTest.cs @@ -1,7 +1,9 @@ using System; using System.Collections.Generic; using System.Linq; +using Bogus; using Examine; +using Lucene.Net.Util; using Newtonsoft.Json; using NUnit.Framework; using Umbraco.Cms.Core.Models; @@ -47,6 +49,43 @@ namespace Umbraco.Cms.Tests.Integration.Umbraco.Examine.Lucene.UmbracoExamine } } + [Test] + public void GivenIndexingDocument_WhenRichTextPropertyData_CanStoreImmenseFields() + { + using (GetSynchronousContentIndex(false, out UmbracoContentIndex index, out _, out ContentValueSetBuilder contentValueSetBuilder, null)) + { + index.CreateIndex(); + + ContentType contentType = ContentTypeBuilder.CreateBasicContentType(); + contentType.AddPropertyType(new PropertyType(TestHelper.ShortStringHelper, "test", ValueStorageType.Ntext) + { + Alias = "rte", + Name = "RichText", + PropertyEditorAlias = Cms.Core.Constants.PropertyEditors.Aliases.TinyMce + }); + + Content content = ContentBuilder.CreateBasicContent(contentType); + content.Id = 555; + content.Path = "-1,555"; + + var luceneStringFieldMaxLength = ByteBlockPool.BYTE_BLOCK_SIZE - 2; + var faker = new Faker(); + var immenseText = faker.Random.String(length: luceneStringFieldMaxLength + 10); + + content.Properties["rte"].SetValue(immenseText); + + IEnumerable valueSet = contentValueSetBuilder.GetValueSets(content); + index.IndexItems(valueSet); + + ISearchResults results = index.Searcher.CreateQuery().Id(555).Execute(); + ISearchResult result = results.First(); + + var key = $"{UmbracoExamineFieldNames.RawFieldPrefix}rte"; + Assert.IsTrue(result.Values.ContainsKey(key)); + Assert.Greater(result.Values[key].Length, luceneStringFieldMaxLength); + } + } + [Test] public void GivenIndexingDocument_WhenGridPropertyData_ThenDataIndexedInSegregatedFields() { diff --git a/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj b/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj index c6d77a7627..a1fa8ae300 100644 --- a/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj +++ b/tests/Umbraco.Tests.Integration/Umbraco.Tests.Integration.csproj @@ -80,6 +80,7 @@ +