V9/bugfix/fix lucene immense raw fields prevent indexing (#11599)

* Added failing test to demonstrate issue with large raw_ fields.

* Switched to StoredField to avoid indexing error for immense fields.

StringField indexes all the content as a single token and has a max
length of 32766.

StoredField does not analyze/index the field but enables retrieval with
luceneSearcher.Doc(docId)

Closes GH #11487
This commit is contained in:
Paul Johnson
2021-11-15 13:24:20 +00:00
committed by Bjarke Berg
parent f64894d37b
commit af942b2558
3 changed files with 41 additions and 6 deletions

View File

@@ -8,8 +8,6 @@ using Examine;
using Examine.Lucene;
using Examine.Lucene.Providers;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Umbraco.Cms.Core;
@@ -103,10 +101,7 @@ namespace Umbraco.Cms.Infrastructure.Examine
//remove the original value so we can store it the correct way
d.RemoveField(f.Key);
d.Add(new StringField(
f.Key,
f.Value[0].ToString(),
Field.Store.YES));
d.Add(new StoredField(f.Key, f.Value[0].ToString()));
}
}

View File

@@ -1,7 +1,9 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Bogus;
using Examine;
using Lucene.Net.Util;
using Newtonsoft.Json;
using NUnit.Framework;
using Umbraco.Cms.Core.Models;
@@ -47,6 +49,43 @@ namespace Umbraco.Cms.Tests.Integration.Umbraco.Examine.Lucene.UmbracoExamine
}
}
[Test]
public void GivenIndexingDocument_WhenRichTextPropertyData_CanStoreImmenseFields()
{
using (GetSynchronousContentIndex(false, out UmbracoContentIndex index, out _, out ContentValueSetBuilder contentValueSetBuilder, null))
{
index.CreateIndex();
ContentType contentType = ContentTypeBuilder.CreateBasicContentType();
contentType.AddPropertyType(new PropertyType(TestHelper.ShortStringHelper, "test", ValueStorageType.Ntext)
{
Alias = "rte",
Name = "RichText",
PropertyEditorAlias = Cms.Core.Constants.PropertyEditors.Aliases.TinyMce
});
Content content = ContentBuilder.CreateBasicContent(contentType);
content.Id = 555;
content.Path = "-1,555";
var luceneStringFieldMaxLength = ByteBlockPool.BYTE_BLOCK_SIZE - 2;
var faker = new Faker();
var immenseText = faker.Random.String(length: luceneStringFieldMaxLength + 10);
content.Properties["rte"].SetValue(immenseText);
IEnumerable<ValueSet> valueSet = contentValueSetBuilder.GetValueSets(content);
index.IndexItems(valueSet);
ISearchResults results = index.Searcher.CreateQuery().Id(555).Execute();
ISearchResult result = results.First();
var key = $"{UmbracoExamineFieldNames.RawFieldPrefix}rte";
Assert.IsTrue(result.Values.ContainsKey(key));
Assert.Greater(result.Values[key].Length, luceneStringFieldMaxLength);
}
}
[Test]
public void GivenIndexingDocument_WhenGridPropertyData_ThenDataIndexedInSegregatedFields()
{

View File

@@ -80,6 +80,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="Bogus" Version="33.1.1" />
<PackageReference Include="Examine.Lucene" Version="2.0.1" />
<PackageReference Include="Microsoft.AspNet.WebApi.Client" Version="5.2.7" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.Testing" Version="5.0.11" />