From 6f30029a4997801f0f3672ab0c02505d4ee016df Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Thu, 1 Oct 2020 09:57:18 +0200 Subject: [PATCH 1/9] #8879 split indexing in 2 seperate methods so we are able to have different logic for all and published only content --- src/Umbraco.Examine/ContentIndexPopulator.cs | 46 ++++++++++++++++---- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 99ff4d7f87..7f712c48e2 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -70,22 +70,48 @@ namespace Umbraco.Examine { contentParentId = _parentId.Value; } + + if (_publishedValuesOnly) + { + IndexPublishedContent(contentParentId, pageIndex, pageSize, indexes); + } + else + { + IndexAllContent(contentParentId, pageIndex, pageSize, indexes); + } + } + + protected void IndexAllContent(int contentParentId, int pageIndex, int pageSize, IReadOnlyList indexes) + { IContent[] content; do { - if (!_publishedValuesOnly) + content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _).ToArray(); + + if (content.Length > 0) { - content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _).ToArray(); - } - else - { - //add the published filter - //note: We will filter for published variants in the validator - content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, - _publishedQuery, Ordering.By("Path", Direction.Ascending)).ToArray(); + // ReSharper disable once PossibleMultipleEnumeration + foreach (var index in indexes) + index.IndexItems(_contentValueSetBuilder.GetValueSets(content)); } + pageIndex++; + } while (content.Length == pageSize); + } + + protected void IndexPublishedContent(int contentParentId, int pageIndex, int pageSize, + IReadOnlyList indexes) + { + IContent[] content; + + do + { + //add the published filter + //note: We will filter for published variants in the validator + content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, + _publishedQuery, Ordering.By("Path", Direction.Ascending)).ToArray(); + if (content.Length > 0) { // ReSharper disable once PossibleMultipleEnumeration @@ -97,4 +123,6 @@ namespace Umbraco.Examine } while (content.Length == pageSize); } } + + } From df2c90e488ba3ae28b4779543dca6f0dc89f3ff2 Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Thu, 1 Oct 2020 10:16:08 +0200 Subject: [PATCH 2/9] For published content index items by getting children recursively --- src/Umbraco.Examine/ContentIndexPopulator.cs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 7f712c48e2..54c872d550 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -109,14 +109,20 @@ namespace Umbraco.Examine { //add the published filter //note: We will filter for published variants in the validator - content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, - _publishedQuery, Ordering.By("Path", Direction.Ascending)).ToArray(); - + content = _contentService.GetPagedChildren(contentParentId, pageIndex, pageSize, out _, _publishedQuery, + Ordering.By("Path", Direction.Ascending)).ToArray(); + if (content.Length > 0) { // ReSharper disable once PossibleMultipleEnumeration foreach (var index in indexes) index.IndexItems(_contentValueSetBuilder.GetValueSets(content)); + + + foreach (var c in content) + { + IndexPublishedContent(c.Id,0, 10000, indexes); + } } pageIndex++; From 257392dd28d55c2414d658be6add052647e6fe80 Mon Sep 17 00:00:00 2001 From: Shannon Date: Mon, 12 Oct 2020 16:49:45 +1100 Subject: [PATCH 3/9] Non-published content indexed in ExternalIndex Fixes issue that the non-published content index populator is executing for the ExternalIndex --- src/Umbraco.Examine/ContentIndexPopulator.cs | 8 +++++++- src/Umbraco.Examine/IUmbracoContentIndex.cs | 14 ++++++++++++++ src/Umbraco.Examine/IndexPopulator.cs | 11 +++++++++-- src/Umbraco.Examine/UmbracoContentIndex.cs | 2 +- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 99ff4d7f87..af30b56038 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -15,7 +15,7 @@ namespace Umbraco.Examine /// /// Performs the data lookups required to rebuild a content index /// - public class ContentIndexPopulator : IndexPopulator + public class ContentIndexPopulator : IndexPopulator { private readonly IContentService _contentService; private readonly IValueSetBuilder _contentValueSetBuilder; @@ -58,6 +58,12 @@ namespace Umbraco.Examine _parentId = parentId; } + public override bool IsRegistered(IUmbracoContentIndex2 index) + { + // check if it should populate based on published values + return _publishedValuesOnly == index.PublishedValuesOnly; + } + protected override void PopulateIndexes(IReadOnlyList indexes) { if (indexes.Count == 0) return; diff --git a/src/Umbraco.Examine/IUmbracoContentIndex.cs b/src/Umbraco.Examine/IUmbracoContentIndex.cs index 3181ff663e..b718ec6bce 100644 --- a/src/Umbraco.Examine/IUmbracoContentIndex.cs +++ b/src/Umbraco.Examine/IUmbracoContentIndex.cs @@ -2,6 +2,20 @@ using Examine; namespace Umbraco.Examine { + /// + /// Marker interface for indexes of Umbraco content + /// + /// + /// This is a backwards compat change, in next major version remove the need for this and just have a single interface + /// + public interface IUmbracoContentIndex2 : IUmbracoContentIndex + { + bool PublishedValuesOnly { get; } + } + + /// + /// Marker interface for indexes of Umbraco content + /// public interface IUmbracoContentIndex : IIndex { diff --git a/src/Umbraco.Examine/IndexPopulator.cs b/src/Umbraco.Examine/IndexPopulator.cs index f9d4d85dc8..bfd757f9be 100644 --- a/src/Umbraco.Examine/IndexPopulator.cs +++ b/src/Umbraco.Examine/IndexPopulator.cs @@ -13,9 +13,16 @@ namespace Umbraco.Examine { public override bool IsRegistered(IIndex index) { - if (base.IsRegistered(index)) return true; - return index is TIndex; + if (base.IsRegistered(index)) + return true; + + if (!(index is TIndex casted)) + return false; + + return IsRegistered(casted); } + + public virtual bool IsRegistered(TIndex index) => true; } public abstract class IndexPopulator : IIndexPopulator diff --git a/src/Umbraco.Examine/UmbracoContentIndex.cs b/src/Umbraco.Examine/UmbracoContentIndex.cs index 67e430b4e9..88033b1407 100644 --- a/src/Umbraco.Examine/UmbracoContentIndex.cs +++ b/src/Umbraco.Examine/UmbracoContentIndex.cs @@ -17,7 +17,7 @@ namespace Umbraco.Examine /// /// An indexer for Umbraco content and media /// - public class UmbracoContentIndex : UmbracoExamineIndex, IUmbracoContentIndex + public class UmbracoContentIndex : UmbracoExamineIndex, IUmbracoContentIndex2 { public const string VariesByCultureFieldName = SpecialFieldPrefix + "VariesByCulture"; protected ILocalizationService LanguageService { get; } From af5d2851b09322ba714585b176e289edd05b13f7 Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Mon, 12 Oct 2020 15:59:35 +0200 Subject: [PATCH 4/9] Enable ordering by level when getting page descendants of content --- .../Repositories/Implement/ContentRepositoryBase.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs index 845006891d..c79646c56f 100644 --- a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs +++ b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs @@ -363,6 +363,9 @@ namespace Umbraco.Core.Persistence.Repositories.Implement if (ordering.OrderBy.InvariantEquals("path")) return GetAliasedField(SqlSyntax.GetFieldName(x => x.Path), sql); + if (ordering.OrderBy.InvariantEquals("level")) + return GetAliasedField(SqlSyntax.GetFieldName(x => x.Level), sql); + // note: 'owner' is the user who created the item as a whole, // we don't have an 'owner' per culture (should we?) if (ordering.OrderBy.InvariantEquals("owner")) From 3da74b74e511879c16eece56a1999d1704f2ad8d Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Mon, 12 Oct 2020 16:13:51 +0200 Subject: [PATCH 5/9] Optimized query for getting published pages only --- src/Umbraco.Examine/ContentIndexPopulator.cs | 42 +++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 54c872d550..15b3a6f526 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -4,6 +4,7 @@ using System.Linq; using Examine; using Umbraco.Core; using Umbraco.Core.Models; +using Umbraco.Core.Models.Blocks; using Umbraco.Core.Services; using Umbraco.Core.Persistence; using Umbraco.Core.Persistence.DatabaseModelDefinitions; @@ -105,24 +106,47 @@ namespace Umbraco.Examine { IContent[] content; + var publishedPages = new HashSet(); + do { //add the published filter //note: We will filter for published variants in the validator - content = _contentService.GetPagedChildren(contentParentId, pageIndex, pageSize, out _, _publishedQuery, - Ordering.By("Path", Direction.Ascending)).ToArray(); + content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, _publishedQuery, + Ordering.By("Level", Direction.Ascending)).ToArray(); if (content.Length > 0) { + var indexableContent = new List(); + + // get the max level in this result set + int maxLevel = content.Max(x => x.Level); + + // gets the first level pages, these are always published because _publishedQuery filter + // and store the id in a hash set so we can track them + var firstLevelContent = content.Where(x => x.Level == 1); + + foreach (var item in firstLevelContent) + { + publishedPages.Add(item.Id); + indexableContent.Add(item); + } + + // get content per level so we can filter the pages that don't have a published parent + for (var level = 2; level <= maxLevel; level++) + { + var levelContent = content.Where(x => x.Level == level && publishedPages.Contains(x.ParentId)); + + foreach (var item in levelContent) + { + publishedPages.Add(item.Id); + indexableContent.Add(item); + } + } + // ReSharper disable once PossibleMultipleEnumeration foreach (var index in indexes) - index.IndexItems(_contentValueSetBuilder.GetValueSets(content)); - - - foreach (var c in content) - { - IndexPublishedContent(c.Id,0, 10000, indexes); - } + index.IndexItems(_contentValueSetBuilder.GetValueSets(indexableContent.ToArray())); } pageIndex++; From 4db30c24c825c57b3c2d7c0d45ccbdb7d62947e1 Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Mon, 12 Oct 2020 16:17:38 +0200 Subject: [PATCH 6/9] Added some more comments --- src/Umbraco.Examine/ContentIndexPopulator.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index e9de19d137..8a137251a4 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -139,6 +139,7 @@ namespace Umbraco.Examine } // get content per level so we can filter the pages that don't have a published parent + // this because the paged descendants method does not take unpublished ancestors into account for (var level = 2; level <= maxLevel; level++) { var levelContent = content.Where(x => x.Level == level && publishedPages.Contains(x.ParentId)); From fc9f80c30d8b8d3398ab8a22f102f1e67b4789fb Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Mon, 12 Oct 2020 16:55:16 +0200 Subject: [PATCH 7/9] Fix broken unit tests --- src/Umbraco.Tests/UmbracoExamine/IndexInitializer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Umbraco.Tests/UmbracoExamine/IndexInitializer.cs b/src/Umbraco.Tests/UmbracoExamine/IndexInitializer.cs index e9f18d8947..1a288b423c 100644 --- a/src/Umbraco.Tests/UmbracoExamine/IndexInitializer.cs +++ b/src/Umbraco.Tests/UmbracoExamine/IndexInitializer.cs @@ -45,7 +45,7 @@ namespace Umbraco.Tests.UmbracoExamine public static ContentIndexPopulator GetContentIndexRebuilder(PropertyEditorCollection propertyEditors, IContentService contentService, IScopeProvider scopeProvider, bool publishedValuesOnly) { var contentValueSetBuilder = GetContentValueSetBuilder(propertyEditors, scopeProvider, publishedValuesOnly); - var contentIndexDataSource = new ContentIndexPopulator(true, null, contentService, scopeProvider.SqlContext, contentValueSetBuilder); + var contentIndexDataSource = new ContentIndexPopulator(publishedValuesOnly, null, contentService, scopeProvider.SqlContext, contentValueSetBuilder); return contentIndexDataSource; } From 165f99c920482335e60d4a1a3babc54cc19bdb33 Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Wed, 14 Oct 2020 08:25:42 +0200 Subject: [PATCH 8/9] Order by path instead of level --- .../Repositories/Implement/ContentRepositoryBase.cs | 3 --- src/Umbraco.Examine/ContentIndexPopulator.cs | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs index c79646c56f..845006891d 100644 --- a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs +++ b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs @@ -363,9 +363,6 @@ namespace Umbraco.Core.Persistence.Repositories.Implement if (ordering.OrderBy.InvariantEquals("path")) return GetAliasedField(SqlSyntax.GetFieldName(x => x.Path), sql); - if (ordering.OrderBy.InvariantEquals("level")) - return GetAliasedField(SqlSyntax.GetFieldName(x => x.Level), sql); - // note: 'owner' is the user who created the item as a whole, // we don't have an 'owner' per culture (should we?) if (ordering.OrderBy.InvariantEquals("owner")) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 8a137251a4..4f2c005ce4 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -119,7 +119,7 @@ namespace Umbraco.Examine //add the published filter //note: We will filter for published variants in the validator content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, _publishedQuery, - Ordering.By("Level", Direction.Ascending)).ToArray(); + Ordering.By("Path", Direction.Ascending)).ToArray(); if (content.Length > 0) { From 29e2b217893a5c3ee3a9ab2294f203505e7bc3b3 Mon Sep 17 00:00:00 2001 From: Dave Woestenborghs Date: Wed, 14 Oct 2020 08:37:49 +0200 Subject: [PATCH 9/9] Refactored to a single loop to avoid multiple enumerations on the results list --- src/Umbraco.Examine/ContentIndexPopulator.cs | 36 ++++++++------------ 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/Umbraco.Examine/ContentIndexPopulator.cs b/src/Umbraco.Examine/ContentIndexPopulator.cs index 4f2c005ce4..01a7046723 100644 --- a/src/Umbraco.Examine/ContentIndexPopulator.cs +++ b/src/Umbraco.Examine/ContentIndexPopulator.cs @@ -120,34 +120,28 @@ namespace Umbraco.Examine //note: We will filter for published variants in the validator content = _contentService.GetPagedDescendants(contentParentId, pageIndex, pageSize, out _, _publishedQuery, Ordering.By("Path", Direction.Ascending)).ToArray(); - + + if (content.Length > 0) { var indexableContent = new List(); - // get the max level in this result set - int maxLevel = content.Max(x => x.Level); - - // gets the first level pages, these are always published because _publishedQuery filter - // and store the id in a hash set so we can track them - var firstLevelContent = content.Where(x => x.Level == 1); - - foreach (var item in firstLevelContent) + foreach (var item in content) { - publishedPages.Add(item.Id); - indexableContent.Add(item); - } - - // get content per level so we can filter the pages that don't have a published parent - // this because the paged descendants method does not take unpublished ancestors into account - for (var level = 2; level <= maxLevel; level++) - { - var levelContent = content.Where(x => x.Level == level && publishedPages.Contains(x.ParentId)); - - foreach (var item in levelContent) + if (item.Level == 1) { - publishedPages.Add(item.Id); + // first level pages are always published so no need to filter them indexableContent.Add(item); + publishedPages.Add(item.Id); + } + else + { + if (publishedPages.Contains(item.ParentId)) + { + // only index when parent is published + publishedPages.Add(item.Id); + indexableContent.Add(item); + } } }