Files
Umbraco-CMS/src/Umbraco.Infrastructure/Examine/ContentIndexPopulator.cs
Shannon Deminick eba6373a12 Examine 2.0 integration (#10241)
* Init commit for examine 2.0 work, most old umb examine tests working, probably a lot that doesn't

* Gets Umbraco Examine tests passing and makes some sense out of them, fixes some underlying issues.

* Large refactor, remove TaskHelper, rename Notifications to be consistent, Gets all examine/lucene indexes building and startup ordered in the correct way, removes old files, creates new IUmbracoIndexingHandler for abstracting out all index operations for umbraco data, abstracts out IIndexRebuilder, Fixes Stack overflow with LiveModelsProvider and loading assemblies, ports some changes from v8 for startup handling with cold boots, refactors out LastSyncedFileManager

* fix up issues with rebuilding and management dashboard.

* removes old files, removes NetworkHelper, fixes LastSyncedFileManager implementation to ensure the machine name is used, fix up logging with cold boot state.

* Makes MainDom safer to use and makes PublishedSnapshotService lazily register with MainDom

* lazily acquire application id (fix unit tests)

* Fixes resource casing and missing test file

* Ensures caches when requiring internal services for PublishedSnapshotService, UseNuCache is a separate call, shouldn't be buried in AddWebComponents, was also causing issues in integration tests since nucache was being used for the Id2Key service.

* For UmbracoTestServerTestBase enable nucache services

* Fixing tests

* Fix another test

* Fixes tests, use TestHostingEnvironment, make Tests.Common use net5, remove old Lucene.Net.Contrib ref.

* Fixes up some review notes

* Fixes issue with doubly registering PublishedSnapshotService meanig there could be 2x instances of it

* Checks for parseexception when executing the query

* Use application root instead of duplicating functionality.

* Added Examine project to netcore only solution file

* Fixed casing issue with LazyLoad, that is not lowercase.

* uses cancellationToken instead of bool flag, fixes always reading lastId from the LastSyncedFileManager, fixes RecurringHostedServiceBase so that there isn't an overlapping thread for the same task type

* Fix tests

* remove legacy test project from solution file

* Fix test

Co-authored-by: Bjarke Berg <mail@bergmania.dk>
2021-05-18 10:31:38 +02:00

176 lines
6.5 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using Examine;
using Microsoft.Extensions.Logging;
using Umbraco.Cms.Core;
using Umbraco.Cms.Core.Models;
using Umbraco.Cms.Core.Persistence.Querying;
using Umbraco.Cms.Core.Services;
using Umbraco.Cms.Infrastructure.Persistence;
namespace Umbraco.Cms.Infrastructure.Examine
{
/// <summary>
/// Performs the data lookups required to rebuild a content index
/// </summary>
public class ContentIndexPopulator : IndexPopulator<IUmbracoContentIndex>
{
private readonly IContentService _contentService;
private readonly IUmbracoDatabaseFactory _umbracoDatabaseFactory;
private readonly IValueSetBuilder<IContent> _contentValueSetBuilder;
/// <summary>
/// This is a static query, it's parameters don't change so store statically
/// </summary>
private IQuery<IContent> _publishedQuery;
private IQuery<IContent> PublishedQuery => _publishedQuery ??= _umbracoDatabaseFactory.SqlContext.Query<IContent>().Where(x => x.Published);
private readonly bool _publishedValuesOnly;
private readonly int? _parentId;
private readonly ILogger<ContentIndexPopulator> _logger;
/// <summary>
/// Default constructor to lookup all content data
/// </summary>
/// <param name="contentService"></param>
/// <param name="sqlContext"></param>
/// <param name="contentValueSetBuilder"></param>
public ContentIndexPopulator(
ILogger<ContentIndexPopulator> logger,
IContentService contentService,
IUmbracoDatabaseFactory umbracoDatabaseFactory,
IContentValueSetBuilder contentValueSetBuilder)
: this(logger, false, null, contentService, umbracoDatabaseFactory, contentValueSetBuilder)
{
}
/// <summary>
/// Optional constructor allowing specifying custom query parameters
/// </summary>
public ContentIndexPopulator(
ILogger<ContentIndexPopulator> logger,
bool publishedValuesOnly,
int? parentId,
IContentService contentService,
IUmbracoDatabaseFactory umbracoDatabaseFactory,
IValueSetBuilder<IContent> contentValueSetBuilder)
{
_contentService = contentService ?? throw new ArgumentNullException(nameof(contentService));
_umbracoDatabaseFactory = umbracoDatabaseFactory ?? throw new ArgumentNullException(nameof(umbracoDatabaseFactory));
_contentValueSetBuilder = contentValueSetBuilder ?? throw new ArgumentNullException(nameof(contentValueSetBuilder));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_publishedValuesOnly = publishedValuesOnly;
_parentId = parentId;
}
public override bool IsRegistered(IUmbracoContentIndex index)
{
// check if it should populate based on published values
return _publishedValuesOnly == index.PublishedValuesOnly;
}
protected override void PopulateIndexes(IReadOnlyList<IIndex> indexes)
{
if (indexes.Count == 0)
{
_logger.LogDebug($"{nameof(PopulateIndexes)} called with no indexes to populate. Typically means no index is registered with this populator.");
return;
}
const int pageSize = 10000;
var pageIndex = 0;
var contentParentId = -1;
if (_parentId.HasValue && _parentId.Value > 0)
{
contentParentId = _parentId.Value;
}
if (_publishedValuesOnly)
{
IndexPublishedContent(contentParentId, pageIndex, pageSize, indexes);
}
else
{
IndexAllContent(contentParentId, pageIndex, pageSize, indexes);
}
}
protected void IndexAllContent(int contentParentId, int pageIndex, int pageSize, IReadOnlyList<IIndex> indexes)
{
IContent[] content;
do
{
content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _).ToArray();
if (content.Length > 0)
{
var valueSets = _contentValueSetBuilder.GetValueSets(content).ToList();
// ReSharper disable once PossibleMultipleEnumeration
foreach (var index in indexes)
{
index.IndexItems(valueSets);
}
}
pageIndex++;
} while (content.Length == pageSize);
}
protected void IndexPublishedContent(int contentParentId, int pageIndex, int pageSize,
IReadOnlyList<IIndex> indexes)
{
IContent[] content;
var publishedPages = new HashSet<int>();
do
{
//add the published filter
//note: We will filter for published variants in the validator
content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, PublishedQuery,
Ordering.By("Path", Direction.Ascending)).ToArray();
if (content.Length > 0)
{
var indexableContent = new List<IContent>();
foreach (var item in content)
{
if (item.Level == 1)
{
// first level pages are always published so no need to filter them
indexableContent.Add(item);
publishedPages.Add(item.Id);
}
else
{
if (publishedPages.Contains(item.ParentId))
{
// only index when parent is published
publishedPages.Add(item.Id);
indexableContent.Add(item);
}
}
}
var valueSets = _contentValueSetBuilder.GetValueSets(indexableContent.ToArray()).ToList();
foreach (IIndex index in indexes)
{
index.IndexItems(valueSets);
}
}
pageIndex++;
} while (content.Length == pageSize);
}
}
}