using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using NPoco; using Umbraco.Core; using Umbraco.Core.Logging; using Umbraco.Core.Persistence; using Umbraco.Core.Persistence.Dtos; using Umbraco.Core.Scoping; using Umbraco.Web.Composing; using static Umbraco.Core.Persistence.NPocoSqlExtensions.Statics; namespace Umbraco.Web.PublishedCache.NuCache.DataSource { // TODO: use SqlTemplate for these queries else it's going to be horribly slow! // provides efficient database access for NuCache internal class DatabaseDataSource : IDataSource { private const int PageSize = 500; private readonly IContentCacheDataSerializerFactory _contentCacheDataSerializerFactory; public DatabaseDataSource(IContentCacheDataSerializerFactory contentCacheDataSerializerFactory) { _contentCacheDataSerializerFactory = contentCacheDataSerializerFactory; } // we want arrays, we want them all loaded, not an enumerable private Sql ContentSourcesSelect(IScope scope, Func, Sql> joins = null) { var sqlTemplate = scope.SqlContext.Templates.Get(Constants.SqlTemplates.NuCacheDatabaseDataSource.ContentSourcesSelect1, tsql => tsql.Select(x => Alias(x.NodeId, "Id"), x => Alias(x.UniqueId, "Uid"), x => Alias(x.Level, "Level"), x => Alias(x.Path, "Path"), x => Alias(x.SortOrder, "SortOrder"), x => Alias(x.ParentId, "ParentId"), x => Alias(x.CreateDate, "CreateDate"), x => Alias(x.UserId, "CreatorId")) .AndSelect(x => Alias(x.ContentTypeId, "ContentTypeId")) .AndSelect(x => Alias(x.Published, "Published"), x => Alias(x.Edited, "Edited")) .AndSelect(x => Alias(x.Id, "VersionId"), x => Alias(x.Text, "EditName"), x => Alias(x.VersionDate, "EditVersionDate"), x => Alias(x.UserId, "EditWriterId")) .AndSelect(x => Alias(x.TemplateId, "EditTemplateId")) .AndSelect("pcver", x => Alias(x.Id, "PublishedVersionId"), x => Alias(x.Text, "PubName"), x => Alias(x.VersionDate, "PubVersionDate"), x => Alias(x.UserId, "PubWriterId")) .AndSelect("pdver", x => Alias(x.TemplateId, "PubTemplateId")) .AndSelect("nuEdit", x => Alias(x.Data, "EditData")) .AndSelect("nuPub", x => Alias(x.Data, "PubData")) .AndSelect("nuEdit", x => Alias(x.RawData, "EditDataRaw")) .AndSelect("nuPub", x => Alias(x.RawData, "PubDataRaw")) .From()); var sql = sqlTemplate.Sql(); // TODO: I'm unsure how we can format the below into SQL templates also because right.Current and right.Published end up being parameters if (joins != null) sql = joins(sql); sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId) .InnerJoin().On((left, right) => left.NodeId == right.NodeId) .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current) .InnerJoin().On((left, right) => left.Id == right.Id) .LeftJoin(j => j.InnerJoin("pdver").On((left, right) => left.Id == right.Id && right.Published == true, "pcver", "pdver"), "pcver") .On((left, right) => left.NodeId == right.NodeId, aliasRight: "pcver") .LeftJoin("nuEdit").On((left, right) => left.NodeId == right.NodeId && right.Published == false, aliasRight: "nuEdit") .LeftJoin("nuPub").On((left, right) => left.NodeId == right.NodeId && right.Published == true, aliasRight: "nuPub"); return sql; } /// /// Returns a slightly more optimized query to use for the document counting when paging over the content sources /// /// /// private Sql ContentSourcesCount(IScope scope, Func, Sql> joins = null) { var sqlTemplate = scope.SqlContext.Templates.Get(Constants.SqlTemplates.NuCacheDatabaseDataSource.ContentSourcesCount, tsql => tsql.Select(x => Alias(x.NodeId, "Id")) .From() .InnerJoin().On((left, right) => left.NodeId == right.NodeId) .InnerJoin().On((left, right) => left.NodeId == right.NodeId)); var sql = sqlTemplate.Sql(); if (joins != null) sql = joins(sql); // TODO: We can't use a template with this one because of the 'right.Current' and 'right.Published' ends up being a parameter so not sure how we can do that sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current) .InnerJoin().On((left, right) => left.Id == right.Id) .LeftJoin(j => j.InnerJoin("pdver").On((left, right) => left.Id == right.Id && right.Published, "pcver", "pdver"), "pcver") .On((left, right) => left.NodeId == right.NodeId, aliasRight: "pcver"); return sql; } public ContentNodeKit GetContentSource(IScope scope, int id) { var sql = ContentSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && x.NodeId == id && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var dto = scope.Database.Fetch(sql).FirstOrDefault(); if (dto == null) return ContentNodeKit.Empty; var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Document); return CreateContentNodeKit(dto, serializer); } public IEnumerable GetAllContentSources(IScope scope) { // Create a different query for the SQL vs the COUNT Sql since the auto-generated COUNT Sql will be inneficient var sql = ContentSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); // create a more efficient COUNT query without the join on the cmsContentNu table var sqlCountQuery = ContentSourcesCount(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Document); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateContentNodeKit(row, serializer); } } public IEnumerable GetBranchContentSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; var sql = ContentSourcesSelect(scope, s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .Where(x => x.NodeId == id, "x") .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); // create a more efficient COUNT query without the join on the cmsContentNu table var sqlCountQuery = ContentSourcesCount(scope, s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .Where(x => x.NodeId == id, "x"); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Document); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateContentNodeKit(row, serializer); } } public IEnumerable GetTypeContentSources(IScope scope, IEnumerable ids) { if (!ids.Any()) yield break; var sql = ContentSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var sqlCountQuery = ContentSourcesCount(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Document && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Document); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateContentNodeKit(row, serializer); } } private Sql MediaSourcesSelect(IScope scope, Func, Sql> joins = null) { var sql = scope.SqlContext.Sql() .Select(x => Alias(x.NodeId, "Id")) .AndSelect(x => Alias(x.ContentTypeId, "ContentTypeId")) .AndSelect(x => Alias(x.Id, "VersionId"), x => Alias(x.Text, "EditName"), x => Alias(x.VersionDate, "EditVersionDate"), x => Alias(x.UserId, "EditWriterId")) .AndSelect("nuEdit", x => Alias(x.Data, "EditData")) .AndSelect("nuEdit", x => Alias(x.RawData, "EditDataRaw")) .From(); if (joins != null) sql = joins(sql); sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId) .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current) .LeftJoin("nuEdit").On((left, right) => left.NodeId == right.NodeId && !right.Published, aliasRight: "nuEdit"); return sql; } private Sql MediaSourcesCount(IScope scope, Func, Sql> joins = null) { var sql = scope.SqlContext.Sql() .Select(x => Alias(x.NodeId, "Id")) .From(); if (joins != null) sql = joins(sql); sql = sql .InnerJoin().On((left, right) => left.NodeId == right.NodeId) .InnerJoin().On((left, right) => left.NodeId == right.NodeId && right.Current); return sql; } public ContentNodeKit GetMediaSource(IScope scope, int id) { var sql = MediaSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && x.NodeId == id && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var dto = scope.Database.Fetch(sql).FirstOrDefault(); if (dto == null) return ContentNodeKit.Empty; var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Media); return CreateMediaNodeKit(dto, serializer); } public IEnumerable GetAllMediaSources(IScope scope) { var sql = MediaSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var sqlCountQuery = MediaSourcesCount(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Media); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateMediaNodeKit(row, serializer); } } public IEnumerable GetBranchMediaSources(IScope scope, int id) { var syntax = scope.SqlContext.SqlSyntax; var sql = MediaSourcesSelect(scope, s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .Where(x => x.NodeId == id, "x") .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var sqlCountQuery = MediaSourcesCount(scope, s => s.InnerJoin("x").On((left, right) => left.NodeId == right.NodeId || SqlText(left.Path, right.Path, (lp, rp) => $"({lp} LIKE {syntax.GetConcat(rp, "',%'")})"), aliasRight: "x")) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .Where(x => x.NodeId == id, "x"); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Media); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateMediaNodeKit(row, serializer); } } public IEnumerable GetTypeMediaSources(IScope scope, IEnumerable ids) { if (!ids.Any()) yield break; var sql = MediaSourcesSelect(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); var sqlCountQuery = MediaSourcesCount(scope) .Where(x => x.NodeObjectType == Constants.ObjectTypes.Media && !x.Trashed) .WhereIn(x => x.ContentTypeId, ids); var sqlCount = scope.SqlContext.Sql("SELECT COUNT(*) FROM (").Append(sqlCountQuery).Append(") npoco_tbl"); var serializer = _contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Media); // We need to page here. We don't want to iterate over every single row in one connection cuz this can cause an SQL Timeout. // We also want to read with a db reader and not load everything into memory, QueryPaged lets us do that. foreach (var row in scope.Database.QueryPaged(PageSize, sql, sqlCount)) { yield return CreateMediaNodeKit(row, serializer); } } private ContentNodeKit CreateContentNodeKit(ContentSourceDto dto, IContentCacheDataSerializer serializer) { ContentData d = null; ContentData p = null; if (dto.Edited) { if (dto.EditData == null && dto.EditDataRaw == null) { if (Debugger.IsAttached) throw new InvalidOperationException("Missing cmsContentNu edited content for node " + dto.Id + ", consider rebuilding."); Current.Logger.Warn("Missing cmsContentNu edited content for node {NodeId}, consider rebuilding.", dto.Id); } else { var deserializedContent = serializer.Deserialize(dto.ContentTypeId, dto.EditData, dto.EditDataRaw); d = new ContentData { Name = dto.EditName, Published = false, TemplateId = dto.EditTemplateId, VersionId = dto.VersionId, VersionDate = dto.EditVersionDate, WriterId = dto.EditWriterId, Properties = deserializedContent.PropertyData, // TODO: We don't want to allocate empty arrays CultureInfos = deserializedContent.CultureData, UrlSegment = deserializedContent.UrlSegment }; } } if (dto.Published) { if (dto.PubData == null && dto.PubDataRaw == null) { if (Debugger.IsAttached) throw new InvalidOperationException("Missing cmsContentNu published content for node " + dto.Id + ", consider rebuilding."); Current.Logger.Warn("Missing cmsContentNu published content for node {NodeId}, consider rebuilding.", dto.Id); } else { var deserializedContent = serializer.Deserialize(dto.ContentTypeId, dto.PubData, dto.PubDataRaw); p = new ContentData { Name = dto.PubName, UrlSegment = deserializedContent.UrlSegment, Published = true, TemplateId = dto.PubTemplateId, VersionId = dto.VersionId, VersionDate = dto.PubVersionDate, WriterId = dto.PubWriterId, Properties = deserializedContent.PropertyData, // TODO: We don't want to allocate empty arrays CultureInfos = deserializedContent.CultureData }; } } var n = new ContentNode(dto.Id, dto.Uid, dto.Level, dto.Path, dto.SortOrder, dto.ParentId, dto.CreateDate, dto.CreatorId); var s = new ContentNodeKit { Node = n, ContentTypeId = dto.ContentTypeId, DraftData = d, PublishedData = p }; return s; } private ContentNodeKit CreateMediaNodeKit(ContentSourceDto dto, IContentCacheDataSerializer serializer) { if (dto.EditData == null && dto.EditDataRaw == null) throw new InvalidOperationException("No data for media " + dto.Id); var deserializedMedia = serializer.Deserialize(dto.ContentTypeId, dto.EditData, dto.EditDataRaw); var p = new ContentData { Name = dto.EditName, Published = true, TemplateId = -1, VersionId = dto.VersionId, VersionDate = dto.EditVersionDate, WriterId = dto.CreatorId, // what-else? Properties = deserializedMedia.PropertyData, // TODO: We don't want to allocate empty arrays CultureInfos = deserializedMedia.CultureData }; var n = new ContentNode(dto.Id, dto.Uid, dto.Level, dto.Path, dto.SortOrder, dto.ParentId, dto.CreateDate, dto.CreatorId); var s = new ContentNodeKit { Node = n, ContentTypeId = dto.ContentTypeId, PublishedData = p }; return s; } } }