From 4b467bf4702b2662cccf478cdb61b60eea17d70b Mon Sep 17 00:00:00 2001 From: Shannon Date: Tue, 7 Apr 2020 01:02:08 +1000 Subject: [PATCH] Creates data integrity health checks --- .../Repositories/IContentRepository.cs | 7 + .../Implement/ContentRepositoryBase.cs | 165 +++++++++++++++++- src/Umbraco.Core/Services/IContentService.cs | 2 +- .../Services/IContentServiceBase.cs | 13 +- .../Services/Implement/ContentService.cs | 20 +++ .../Services/Implement/MediaService.cs | 24 +++ .../Checks/Data/DatabaseIntegrityCheck.cs | 97 ++++++++++ src/Umbraco.Web/Umbraco.Web.csproj | 1 + 8 files changed, 326 insertions(+), 3 deletions(-) create mode 100644 src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs diff --git a/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs b/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs index 217719e144..aff7a58652 100644 --- a/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs +++ b/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs @@ -77,5 +77,12 @@ namespace Umbraco.Core.Persistence.Repositories /// Here, can be null but cannot. IEnumerable GetPage(IQuery query, long pageIndex, int pageSize, out long totalRecords, IQuery filter, Ordering ordering); + + /// + /// Checks the data integrity of the node paths stored in the database + /// + bool VerifyNodePaths(out int[] invalidIds); + + void FixNodePaths(); } } diff --git a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs index 13b687eb4e..cd79923323 100644 --- a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs +++ b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs @@ -403,7 +403,7 @@ namespace Umbraco.Core.Persistence.Repositories.Implement } // content type alias is invariant - if(ordering.OrderBy.InvariantEquals("contentTypeAlias")) + if (ordering.OrderBy.InvariantEquals("contentTypeAlias")) { var joins = Sql() .InnerJoin("ctype").On((content, contentType) => content.ContentTypeId == contentType.NodeId, aliasRight: "ctype"); @@ -477,6 +477,169 @@ namespace Umbraco.Core.Persistence.Repositories.Implement IQuery filter, Ordering ordering); + public bool VerifyNodePaths(out int[] invalidIds) + { + var invalid = new List(); + + var sql = SqlContext.Sql() + .Select() + .From() + .Where(x => x.NodeObjectType == NodeObjectTypeId) + .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); + + // TODO: Could verify sort orders here too + + var currentParentIds = new HashSet { -1 }; + var prevParentIds = currentParentIds; + var lastLevel = -1; + + // use a forward cursor (query) + foreach (var node in Database.Query(sql)) + { + if (node.Level != lastLevel) + { + // changing levels + prevParentIds = currentParentIds; + currentParentIds = null; + lastLevel = node.Level; + } + + if (currentParentIds == null) + { + // we're reset + currentParentIds = new HashSet(); + } + + currentParentIds.Add(node.NodeId); + + var pathParts = node.Path.Split(','); + + if (!prevParentIds.Contains(node.ParentId)) + { + // invalid, this will be because the level is wrong + invalid.Add(node.NodeId); + } + else if (pathParts.Length < 2) + { + // invalid path + invalid.Add(node.NodeId); + } + else if (pathParts.Length - 1 != node.Level) + { + // invalid, either path or level is wrong + invalid.Add(node.NodeId); + } + else if (pathParts[pathParts.Length - 1] != node.NodeId.ToString()) + { + // invalid path + invalid.Add(node.NodeId); + } + else if (pathParts[pathParts.Length - 2] != node.ParentId.ToString()) + { + // invalid path + invalid.Add(node.NodeId); + } + } + + invalidIds = invalid.ToArray(); + return invalid.Count == 0; + } + + public void FixNodePaths() + { + // TODO: We can probably combine this logic with the above + + var invalid = new List<(int child, int parent)>(); + + var sql = SqlContext.Sql() + .Select() + .From() + .Where(x => x.NodeObjectType == NodeObjectTypeId) + .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); + + // TODO: Could verify sort orders here too + + var updated = new List(); + var missingParentIds = new Dictionary>(); + var currentParentIds = new HashSet { -1 }; + var prevParentIds = currentParentIds; + var lastLevel = -1; + + // use a forward cursor (query) + foreach (var node in Database.Query(sql)) + { + if (node.Level != lastLevel) + { + // changing levels + prevParentIds = currentParentIds; + currentParentIds = null; + lastLevel = node.Level; + } + + if (currentParentIds == null) + { + // we're reset + currentParentIds = new HashSet(); + } + + currentParentIds.Add(node.NodeId); + + var pathParts = node.Path.Split(','); + + if (!prevParentIds.Contains(node.ParentId)) + { + // invalid, this will be because the level is wrong (which prob means path is wrong too) + invalid.Add((node.NodeId, node.ParentId)); + if (missingParentIds.TryGetValue(node.ParentId, out var childIds)) + childIds.Add(node); + else + missingParentIds[node.ParentId] = new List {node}; + } + else if (pathParts.Length < 2) + { + // invalid path + invalid.Add((node.NodeId, node.ParentId)); + } + else if (pathParts.Length - 1 != node.Level) + { + // invalid, either path or level is wrong + invalid.Add((node.NodeId, node.ParentId)); + } + else if (pathParts[pathParts.Length - 1] != node.NodeId.ToString()) + { + // invalid path + invalid.Add((node.NodeId, node.ParentId)); + } + else if (pathParts[pathParts.Length - 2] != node.ParentId.ToString()) + { + // invalid path + invalid.Add((node.NodeId, node.ParentId)); + } + else + { + // it's valid + + if (missingParentIds.TryGetValue(node.NodeId, out var invalidNodes)) + { + // this parent has been flagged as missing which means one or more of it's children was ordered + // wrong and was checked first. So now we can try to rebuild the invalid paths. + + foreach (var invalidNode in invalidNodes) + { + invalidNode.Level = (short) (node.Level + 1); + invalidNode.Path = node.Path + "," + invalidNode.NodeId; + updated.Add(invalidNode); + } + } + } + } + + foreach (var node in updated) + { + Database.Update(node); + } + } + // here, filter can be null and ordering cannot protected IEnumerable GetPage(IQuery query, long pageIndex, int pageSize, out long totalRecords, diff --git a/src/Umbraco.Core/Services/IContentService.cs b/src/Umbraco.Core/Services/IContentService.cs index 6f9ca58821..58279fb4da 100644 --- a/src/Umbraco.Core/Services/IContentService.cs +++ b/src/Umbraco.Core/Services/IContentService.cs @@ -526,6 +526,6 @@ namespace Umbraco.Core.Services OperationResult Rollback(int id, int versionId, string culture = "*", int userId = Constants.Security.SuperUserId); #endregion - + } } diff --git a/src/Umbraco.Core/Services/IContentServiceBase.cs b/src/Umbraco.Core/Services/IContentServiceBase.cs index 439c55d0d0..1c04e0b4a3 100644 --- a/src/Umbraco.Core/Services/IContentServiceBase.cs +++ b/src/Umbraco.Core/Services/IContentServiceBase.cs @@ -5,5 +5,16 @@ /// TODO: Start sharing the logic! /// public interface IContentServiceBase : IService - { } + { + + /// + /// Checks the data integrity of the node paths/levels stored in the database + /// + bool VerifyNodePaths(out int[] invalidIds); + + /// + /// Fixes the data integrity of node paths/levels stored in the database + /// + void FixNodePaths(); + } } diff --git a/src/Umbraco.Core/Services/Implement/ContentService.cs b/src/Umbraco.Core/Services/Implement/ContentService.cs index 1558b0170b..5d010d321f 100644 --- a/src/Umbraco.Core/Services/Implement/ContentService.cs +++ b/src/Umbraco.Core/Services/Implement/ContentService.cs @@ -2375,6 +2375,26 @@ namespace Umbraco.Core.Services.Implement return OperationResult.Succeed(evtMsgs); } + public bool VerifyNodePaths(out int[] invalidIds) + { + using (var scope = ScopeProvider.CreateScope(autoComplete: true)) + { + scope.ReadLock(Constants.Locks.ContentTree); + return _documentRepository.VerifyNodePaths(out invalidIds); + } + } + + public void FixNodePaths() + { + using (var scope = ScopeProvider.CreateScope(autoComplete: true)) + { + scope.WriteLock(Constants.Locks.ContentTree); + _documentRepository.FixNodePaths(); + + // TODO: We're going to have to clear all caches + } + } + #endregion #region Internal Methods diff --git a/src/Umbraco.Core/Services/Implement/MediaService.cs b/src/Umbraco.Core/Services/Implement/MediaService.cs index 528d0a0bf9..94f57bd859 100644 --- a/src/Umbraco.Core/Services/Implement/MediaService.cs +++ b/src/Umbraco.Core/Services/Implement/MediaService.cs @@ -1139,6 +1139,28 @@ namespace Umbraco.Core.Services.Implement } return true; + + } + + + public bool VerifyNodePaths(out int[] invalidIds) + { + using (var scope = ScopeProvider.CreateScope(autoComplete: true)) + { + scope.ReadLock(Constants.Locks.MediaTree); + return _mediaRepository.VerifyNodePaths(out invalidIds); + } + } + + public void FixNodePaths() + { + using (var scope = ScopeProvider.CreateScope(autoComplete: true)) + { + scope.WriteLock(Constants.Locks.MediaTree); + _mediaRepository.FixNodePaths(); + + // TODO: We're going to have to clear all caches + } } #endregion @@ -1358,5 +1380,7 @@ namespace Umbraco.Core.Services.Implement } #endregion + + } } diff --git a/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs b/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs new file mode 100644 index 0000000000..d7bf62067f --- /dev/null +++ b/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs @@ -0,0 +1,97 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Umbraco.Core.Services; + +namespace Umbraco.Web.HealthCheck.Checks.Data +{ + [HealthCheck( + "73DD0C1C-E0CA-4C31-9564-1DCA509788AF", + "Database integrity check", + Description = "Checks for various data integrity issues in the Umbraco database.", + Group = "Data Integrity")] + public class DatabaseIntegrityCheck : HealthCheck + { + private readonly IContentService _contentService; + private readonly IMediaService _mediaService; + private const string _fixMediaPaths = "fixMediaPaths"; + private const string _fixContentPaths = "fixContentPaths"; + + public DatabaseIntegrityCheck(IContentService contentService, IMediaService mediaService) + { + _contentService = contentService; + _mediaService = mediaService; + } + + /// + /// Get the status for this health check + /// + /// + public override IEnumerable GetStatus() + { + //return the statuses + return new[] + { + CheckContent(), + CheckMedia() + }; + } + + private HealthCheckStatus CheckMedia() + { + return CheckPaths(_fixMediaPaths, "Fix media paths", "media", () => + { + var mediaPaths = _mediaService.VerifyNodePaths(out var invalidMediaPaths); + return (mediaPaths, invalidMediaPaths); + }); + } + + private HealthCheckStatus CheckContent() + { + return CheckPaths(_fixContentPaths, "Fix content paths", "content", () => + { + var contentPaths = _contentService.VerifyNodePaths(out var invalidContentPaths); + return (contentPaths, invalidContentPaths); + }); + } + + private HealthCheckStatus CheckPaths(string actionAlias, string actionName, string entityType, Func<(bool success, int[] invalidPaths)> doCheck) + { + var result = doCheck(); + + var actions = new List(); + if (!result.success) + { + actions.Add(new HealthCheckAction(actionAlias, Id) + { + Name = actionName + }); + } + + return new HealthCheckStatus(result.success + ? $"All {entityType} paths are valid" + : $"There are {result.invalidPaths.Length} invalid {entityType} paths") + { + ResultType = result.success ? StatusResultType.Success : StatusResultType.Error, + Actions = actions + }; + } + + public override HealthCheckStatus ExecuteAction(HealthCheckAction action) + { + switch (action.Alias) + { + case _fixContentPaths: + _contentService.FixNodePaths(); + return CheckContent(); + case _fixMediaPaths: + _mediaService.FixNodePaths(); + return CheckMedia(); + default: + throw new InvalidOperationException("Action not supported"); + } + } + } +} diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index c3024f63ae..e39687bed8 100755 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -156,6 +156,7 @@ +