From 5d8bfcea979d1582af1dc3f7ed973107b16368e5 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 17 Apr 2020 00:47:26 +1000 Subject: [PATCH 1/3] Ensure when we are loading in ALL data that we page over the data as to not cause an SQL timeout --- .../Persistence/NPocoDatabaseExtensions.cs | 42 +++++++++++++ .../NuCache/DataSource/DatabaseDataSource.cs | 63 ++++++++++++------- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs b/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs index acfa51f895..152dcbe6d3 100644 --- a/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs +++ b/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs @@ -18,6 +18,48 @@ namespace Umbraco.Core.Persistence /// public static partial class NPocoDatabaseExtensions { + /// + /// Iterates over the result of a paged data set with a db reader + /// + /// + /// + /// + /// The number of rows to load per page + /// + /// + /// + /// + /// NPoco's normal Page returns a List{T} but sometimes we don't want all that in memory and instead want to + /// iterate over each row with a reader using Query vs Fetch. + /// + internal static IEnumerable QueryPaged(this IDatabase database, long pageSize, Sql sql) + { + var sqlString = sql.SQL; + var sqlArgs = sql.Arguments; + + int? itemCount = null; + long pageIndex = 0; + do + { + // Get the paged queries + database.BuildPageQueries(pageIndex * pageSize, pageSize, sqlString, ref sqlArgs, out var sqlCount, out var sqlPage); + + // get the item count once + if (itemCount == null) + { + itemCount = database.ExecuteScalar(sqlCount, sqlArgs); + } + pageIndex++; + + // iterate over rows without allocating all items to memory (Query vs Fetch) + foreach (var row in database.Query(sqlPage, sqlArgs)) + { + yield return row; + } + + } while ((pageIndex * pageSize) < itemCount); + } + // NOTE // // proper way to do it with TSQL and SQLCE diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs index 19aab7ea65..694dac04df 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs @@ -20,6 +20,8 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource // provides efficient database access for NuCache internal class DatabaseDataSource : IDataSource { + private const int PageSize = 500; + // we want arrays, we want them all loaded, not an enumerable private Sql ContentSourcesSelect(IScope scope, Func, Sql> joins = null) @@ -79,33 +81,43 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateContentNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } public IEnumerable GetBranchContentSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; - var sql = ContentSourcesSelect(scope, s => s + var sql = ContentSourcesSelect(scope, + s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) + .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) + .Where(x => x.NodeId == id, "x") + .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - .InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. - .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) - .Where(x => x.NodeId == id, "x") - .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - - return scope.Database.Query(sql).Select(CreateContentNodeKit); + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } public IEnumerable GetTypeContentSources(IScope scope, IEnumerable ids) { - if (!ids.Any()) return Enumerable.Empty(); + if (!ids.Any()) yield break; var sql = ContentSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateContentNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } private Sql MediaSourcesSelect(IScope scope, Func, Sql> joins = null) @@ -116,11 +128,8 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource x => Alias(x.Level, "Level"), x => Alias(x.Path, "Path"), x => Alias(x.SortOrder, "SortOrder"), x => Alias(x.ParentId, "ParentId"), x => Alias(x.CreateDate, "CreateDate"), x => Alias(x.UserId, "CreatorId")) .AndSelect(x => Alias(x.ContentTypeId, "ContentTypeId")) - .AndSelect(x => Alias(x.Id, "VersionId"), x => Alias(x.Text, "EditName"), x => Alias(x.VersionDate, "EditVersionDate"), x => Alias(x.UserId, "EditWriterId")) - .AndSelect("nuEdit", x => Alias(x.Data, "EditData")) - .From(); if (joins != null) @@ -128,9 +137,7 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId) - .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current) - .LeftJoin("nuEdit").On((left, right) => left.NodeId == right.NodeId && !right.Published, aliasRight: "nuEdit"); return sql; @@ -152,33 +159,43 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } public IEnumerable GetBranchMediaSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; - var sql = MediaSourcesSelect(scope, s => s - - .InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) - + var sql = MediaSourcesSelect(scope, + s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .Where(x => x.NodeId == id, "x") .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } public IEnumerable GetTypeMediaSources(IScope scope, IEnumerable ids) { - if (!ids.Any()) return Enumerable.Empty(); + if (!ids.Any()) yield break; var sql = MediaSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } private static ContentNodeKit CreateContentNodeKit(ContentSourceDto dto) From 2509dc3749dac1ce44071f59a0ea2a8a21f27024 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 17 Apr 2020 14:46:45 +1000 Subject: [PATCH 2/3] runs in parallel the call to rebuild the in-memory cache from the db sources when in pure live mode and content types change --- .../NuCache/PublishedSnapshotService.cs | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index a33d9ee427..4e630cad3d 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -5,6 +5,7 @@ using System.Globalization; using System.IO; using System.Linq; using System.Threading; +using System.Threading.Tasks; using CSharpTest.Net.Collections; using Newtonsoft.Json; using Umbraco.Core; @@ -866,12 +867,24 @@ namespace Umbraco.Web.PublishedCache.NuCache //into a new DLL for the application which includes both content types and media types. //Since the models in the cache are based on these actual classes, all of the objects in the cache need to be updated //to use the newest version of the class. - using (_contentStore.GetScopedWriteLock(_scopeProvider)) - using (_mediaStore.GetScopedWriteLock(_scopeProvider)) - { - NotifyLocked(new[] { new ContentCacheRefresher.JsonPayload(0, null, TreeChangeTypes.RefreshAll) }, out var draftChanged, out var publishedChanged); - NotifyLocked(new[] { new MediaCacheRefresher.JsonPayload(0, null, TreeChangeTypes.RefreshAll) }, out var anythingChanged); - } + + // These can be run side by side in parallel + + Parallel.Invoke( + () => + { + using (_contentStore.GetScopedWriteLock(_scopeProvider)) + { + NotifyLocked(new[] { new ContentCacheRefresher.JsonPayload(0, null, TreeChangeTypes.RefreshAll) }, out _, out _); + } + }, + () => + { + using (_mediaStore.GetScopedWriteLock(_scopeProvider)) + { + NotifyLocked(new[] { new MediaCacheRefresher.JsonPayload(0, null, TreeChangeTypes.RefreshAll) }, out _); + } + }); } ((PublishedSnapshot)CurrentPublishedSnapshot)?.Resync(); From 49da58b23c064b6cdeb31e4d5c858e09da351095 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 17 Apr 2020 14:56:49 +1000 Subject: [PATCH 3/3] adds notes --- .../NuCache/PublishedSnapshotService.cs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs index 4e630cad3d..6866878484 100755 --- a/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/PublishedSnapshotService.cs @@ -868,7 +868,20 @@ namespace Umbraco.Web.PublishedCache.NuCache //Since the models in the cache are based on these actual classes, all of the objects in the cache need to be updated //to use the newest version of the class. - // These can be run side by side in parallel + // NOTE: Ideally this can be run on background threads here which would prevent blocking the UI + // as is the case when saving a content type. Intially one would think that it won't be any different + // between running this here or in another background thread immediately after with regards to how the + // UI will respond because we already know between calling `WithSafeLiveFactoryReset` to reset the PureLive models + // and this code here, that many front-end requests could be attempted to be processed. If that is the case, those pages are going to get a + // model binding error and our ModelBindingExceptionFilter is going to to its magic to reload those pages so the end user is none the wiser. + // So whether or not this executes 'here' or on a background thread immediately wouldn't seem to make any difference except that we can return + // execution to the UI sooner. + // BUT!... there is a difference IIRC. There is still execution logic that continues after this call on this thread with the cache refreshers + // and those cache refreshers need to have the up-to-date data since other user cache refreshers will be expecting the data to be 'live'. If + // we ran this on a background thread then those cache refreshers are going to not get 'live' data when they query the content cache which + // they require. + + // These can be run side by side in parallel. Parallel.Invoke( () =>