From 5d8bfcea979d1582af1dc3f7ed973107b16368e5 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 17 Apr 2020 00:47:26 +1000 Subject: [PATCH] Ensure when we are loading in ALL data that we page over the data as to not cause an SQL timeout --- .../Persistence/NPocoDatabaseExtensions.cs | 42 +++++++++++++ .../NuCache/DataSource/DatabaseDataSource.cs | 63 ++++++++++++------- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs b/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs index acfa51f895..152dcbe6d3 100644 --- a/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs +++ b/src/Umbraco.Core/Persistence/NPocoDatabaseExtensions.cs @@ -18,6 +18,48 @@ namespace Umbraco.Core.Persistence /// public static partial class NPocoDatabaseExtensions { + /// + /// Iterates over the result of a paged data set with a db reader + /// + /// + /// + /// + /// The number of rows to load per page + /// + /// + /// + /// + /// NPoco's normal Page returns a List{T} but sometimes we don't want all that in memory and instead want to + /// iterate over each row with a reader using Query vs Fetch. + /// + internal static IEnumerable QueryPaged(this IDatabase database, long pageSize, Sql sql) + { + var sqlString = sql.SQL; + var sqlArgs = sql.Arguments; + + int? itemCount = null; + long pageIndex = 0; + do + { + // Get the paged queries + database.BuildPageQueries(pageIndex * pageSize, pageSize, sqlString, ref sqlArgs, out var sqlCount, out var sqlPage); + + // get the item count once + if (itemCount == null) + { + itemCount = database.ExecuteScalar(sqlCount, sqlArgs); + } + pageIndex++; + + // iterate over rows without allocating all items to memory (Query vs Fetch) + foreach (var row in database.Query(sqlPage, sqlArgs)) + { + yield return row; + } + + } while ((pageIndex * pageSize) < itemCount); + } + // NOTE // // proper way to do it with TSQL and SQLCE diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs index 19aab7ea65..694dac04df 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/DatabaseDataSource.cs @@ -20,6 +20,8 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource // provides efficient database access for NuCache internal class DatabaseDataSource : IDataSource { + private const int PageSize = 500; + // we want arrays, we want them all loaded, not an enumerable private Sql ContentSourcesSelect(IScope scope, Func, Sql> joins = null) @@ -79,33 +81,43 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateContentNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } public IEnumerable GetBranchContentSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; - var sql = ContentSourcesSelect(scope, s => s + var sql = ContentSourcesSelect(scope, + s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) + .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) + .Where(x => x.NodeId == id, "x") + .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - .InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. - .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) - .Where(x => x.NodeId == id, "x") - .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - - return scope.Database.Query(sql).Select(CreateContentNodeKit); + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } public IEnumerable GetTypeContentSources(IScope scope, IEnumerable ids) { - if (!ids.Any()) return Enumerable.Empty(); + if (!ids.Any()) yield break; var sql = ContentSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateContentNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateContentNodeKit(row); } private Sql MediaSourcesSelect(IScope scope, Func, Sql> joins = null) @@ -116,11 +128,8 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource x => Alias(x.Level, "Level"), x => Alias(x.Path, "Path"), x => Alias(x.SortOrder, "SortOrder"), x => Alias(x.ParentId, "ParentId"), x => Alias(x.CreateDate, "CreateDate"), x => Alias(x.UserId, "CreatorId")) .AndSelect(x => Alias(x.ContentTypeId, "ContentTypeId")) - .AndSelect(x => Alias(x.Id, "VersionId"), x => Alias(x.Text, "EditName"), x => Alias(x.VersionDate, "EditVersionDate"), x => Alias(x.UserId, "EditWriterId")) - .AndSelect("nuEdit", x => Alias(x.Data, "EditData")) - .From(); if (joins != null) @@ -128,9 +137,7 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId) - .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current) - .LeftJoin("nuEdit").On((left, right) => left.NodeId == right.NodeId && !right.Published, aliasRight: "nuEdit"); return sql; @@ -152,33 +159,43 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } public IEnumerable GetBranchMediaSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; - var sql = MediaSourcesSelect(scope, s => s - - .InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) - + var sql = MediaSourcesSelect(scope, + s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .Where(x => x.NodeId == id, "x") .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } public IEnumerable GetTypeMediaSources(IScope scope, IEnumerable ids) { - if (!ids.Any()) return Enumerable.Empty(); + if (!ids.Any()) yield break; var sql = MediaSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - return scope.Database.Query(sql).Select(CreateMediaNodeKit); + // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. + // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. + + foreach (var row in scope.Database.QueryPaged(PageSize, sql)) + yield return CreateMediaNodeKit(row); } private static ContentNodeKit CreateContentNodeKit(ContentSourceDto dto)