diff --git a/src/Umbraco.Core/ContentExtensions.cs b/src/Umbraco.Core/ContentExtensions.cs index 3ee2a75b09..7bce23e98e 100644 --- a/src/Umbraco.Core/ContentExtensions.cs +++ b/src/Umbraco.Core/ContentExtensions.cs @@ -66,7 +66,10 @@ namespace Umbraco.Core // When this occurs, only Path + Level + UpdateDate are being changed. In this case we can bypass a lot of the below // operations which will make this whole operation go much faster. When moving we don't need to create // new versions, etc... because we cannot roll this operation back anyways. - var isMoving = entity.GetDirtyProperties().All(x => x == nameof(entity.Path) || x == nameof(entity.Level) || x == nameof(entity.UpdateDate)); + var isMoving = entity.IsPropertyDirty(nameof(entity.Path)) + && entity.IsPropertyDirty(nameof(entity.Level)) + && entity.IsPropertyDirty(nameof(entity.UpdateDate)); + return isMoving; } diff --git a/src/Umbraco.Core/Models/ContentDataIntegrityReport.cs b/src/Umbraco.Core/Models/ContentDataIntegrityReport.cs new file mode 100644 index 0000000000..60683ee72f --- /dev/null +++ b/src/Umbraco.Core/Models/ContentDataIntegrityReport.cs @@ -0,0 +1,44 @@ +using System.Collections.Generic; + +namespace Umbraco.Core.Models +{ + public class ContentDataIntegrityReport + { + public ContentDataIntegrityReport(IReadOnlyDictionary detectedIssues) + { + DetectedIssues = detectedIssues; + } + + public bool Ok => DetectedIssues.Count == 0; + + public IReadOnlyDictionary DetectedIssues { get; } + + public enum IssueType + { + /// + /// The item's level and path are inconsistent with it's parent's path and level + /// + InvalidPathAndLevelByParentId, + + /// + /// The item's path doesn't contain all required parts + /// + InvalidPathEmpty, + + /// + /// The item's path parts are inconsistent with it's level value + /// + InvalidPathLevelMismatch, + + /// + /// The item's path does not end with it's own ID + /// + InvalidPathById, + + /// + /// The item's path does not have it's parent Id as the 2nd last entry + /// + InvalidPathByParentId, + } + } +} diff --git a/src/Umbraco.Core/Models/ContentDataIntegrityReportEntry.cs b/src/Umbraco.Core/Models/ContentDataIntegrityReportEntry.cs new file mode 100644 index 0000000000..517b9e80dc --- /dev/null +++ b/src/Umbraco.Core/Models/ContentDataIntegrityReportEntry.cs @@ -0,0 +1,13 @@ +namespace Umbraco.Core.Models +{ + public class ContentDataIntegrityReportEntry + { + public ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType issueType) + { + IssueType = issueType; + } + + public ContentDataIntegrityReport.IssueType IssueType { get; } + public bool Fixed { get; set; } + } +} diff --git a/src/Umbraco.Core/Models/ContentDataIntegrityReportOptions.cs b/src/Umbraco.Core/Models/ContentDataIntegrityReportOptions.cs new file mode 100644 index 0000000000..c4689467c1 --- /dev/null +++ b/src/Umbraco.Core/Models/ContentDataIntegrityReportOptions.cs @@ -0,0 +1,13 @@ +namespace Umbraco.Core.Models +{ + public class ContentDataIntegrityReportOptions + { + /// + /// Set to true to try to automatically resolve data integrity issues + /// + public bool FixIssues { get; set; } + + // TODO: We could define all sorts of options for the data integrity check like what to check for, what to fix, etc... + // things like Tag data consistency, etc... + } +} diff --git a/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs b/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs index aff7a58652..ad9e2d27c1 100644 --- a/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs +++ b/src/Umbraco.Core/Persistence/Repositories/IContentRepository.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using Umbraco.Core.Models; using Umbraco.Core.Models.Entities; using Umbraco.Core.Persistence.DatabaseModelDefinitions; using Umbraco.Core.Persistence.Querying; @@ -78,11 +79,6 @@ namespace Umbraco.Core.Persistence.Repositories IEnumerable GetPage(IQuery query, long pageIndex, int pageSize, out long totalRecords, IQuery filter, Ordering ordering); - /// - /// Checks the data integrity of the node paths stored in the database - /// - bool VerifyNodePaths(out int[] invalidIds); - - void FixNodePaths(); + ContentDataIntegrityReport CheckDataIntegrity(ContentDataIntegrityReportOptions options); } } diff --git a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs index cf67244fbe..42d7e67bc0 100644 --- a/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs +++ b/src/Umbraco.Core/Persistence/Repositories/Implement/ContentRepositoryBase.cs @@ -477,9 +477,9 @@ namespace Umbraco.Core.Persistence.Repositories.Implement IQuery filter, Ordering ordering); - public bool VerifyNodePaths(out int[] invalidIds) + public ContentDataIntegrityReport CheckDataIntegrity(ContentDataIntegrityReportOptions options) { - var invalid = new List(); + var report = new Dictionary(); var sql = SqlContext.Sql() .Select() @@ -487,80 +487,8 @@ namespace Umbraco.Core.Persistence.Repositories.Implement .Where(x => x.NodeObjectType == NodeObjectTypeId) .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - // TODO: Could verify sort orders here too - - var currentParentIds = new HashSet { -1 }; - var prevParentIds = currentParentIds; - var lastLevel = -1; - - // use a forward cursor (query) - foreach (var node in Database.Query(sql)) - { - if (node.Level != lastLevel) - { - // changing levels - prevParentIds = currentParentIds; - currentParentIds = null; - lastLevel = node.Level; - } - - if (currentParentIds == null) - { - // we're reset - currentParentIds = new HashSet(); - } - - currentParentIds.Add(node.NodeId); - - var pathParts = node.Path.Split(','); - - if (!prevParentIds.Contains(node.ParentId)) - { - // invalid, this will be because the level is wrong - invalid.Add(node.NodeId); - } - else if (pathParts.Length < 2) - { - // invalid path - invalid.Add(node.NodeId); - } - else if (pathParts.Length - 1 != node.Level) - { - // invalid, either path or level is wrong - invalid.Add(node.NodeId); - } - else if (pathParts[pathParts.Length - 1] != node.NodeId.ToString()) - { - // invalid path - invalid.Add(node.NodeId); - } - else if (pathParts[pathParts.Length - 2] != node.ParentId.ToString()) - { - // invalid path - invalid.Add(node.NodeId); - } - } - - invalidIds = invalid.ToArray(); - return invalid.Count == 0; - } - - public void FixNodePaths() - { - // TODO: We can probably combine this logic with the above - - var invalid = new List<(int child, int parent)>(); - - var sql = SqlContext.Sql() - .Select() - .From() - .Where(x => x.NodeObjectType == NodeObjectTypeId) - .OrderBy(x => x.Level, x => x.ParentId, x => x.SortOrder); - - // TODO: Could verify sort orders here too - - var updated = new List(); - var missingParentIds = new Dictionary>(); + var nodesToRebuild = new Dictionary>(); + var validNodes = new Dictionary(); var currentParentIds = new HashSet { -1 }; var prevParentIds = currentParentIds; var lastLevel = -1; @@ -589,54 +517,77 @@ namespace Umbraco.Core.Persistence.Repositories.Implement if (!prevParentIds.Contains(node.ParentId)) { // invalid, this will be because the level is wrong (which prob means path is wrong too) - invalid.Add((node.NodeId, node.ParentId)); - if (missingParentIds.TryGetValue(node.ParentId, out var childIds)) - childIds.Add(node); - else - missingParentIds[node.ParentId] = new List {node}; + report.Add(node.NodeId, new ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType.InvalidPathAndLevelByParentId)); + AppendNodeToFix(nodesToRebuild, node); } else if (pathParts.Length < 2) { // invalid path - invalid.Add((node.NodeId, node.ParentId)); + report.Add(node.NodeId, new ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType.InvalidPathEmpty)); + AppendNodeToFix(nodesToRebuild, node); } else if (pathParts.Length - 1 != node.Level) { // invalid, either path or level is wrong - invalid.Add((node.NodeId, node.ParentId)); + report.Add(node.NodeId, new ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType.InvalidPathLevelMismatch)); + AppendNodeToFix(nodesToRebuild, node); } else if (pathParts[pathParts.Length - 1] != node.NodeId.ToString()) { // invalid path - invalid.Add((node.NodeId, node.ParentId)); + report.Add(node.NodeId, new ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType.InvalidPathById)); + AppendNodeToFix(nodesToRebuild, node); } else if (pathParts[pathParts.Length - 2] != node.ParentId.ToString()) { // invalid path - invalid.Add((node.NodeId, node.ParentId)); + report.Add(node.NodeId, new ContentDataIntegrityReportEntry(ContentDataIntegrityReport.IssueType.InvalidPathByParentId)); + AppendNodeToFix(nodesToRebuild, node); } else { - // it's valid + // it's valid! - if (missingParentIds.TryGetValue(node.NodeId, out var invalidNodes)) - { - // this parent has been flagged as missing which means one or more of it's children was ordered - // wrong and was checked first. So now we can try to rebuild the invalid paths. - - foreach (var invalidNode in invalidNodes) - { - invalidNode.Level = (short) (node.Level + 1); - invalidNode.Path = node.Path + "," + invalidNode.NodeId; - updated.Add(invalidNode); - } - } + // don't track unless we are configured to fix + if (options.FixIssues) + validNodes.Add(node.NodeId, node); } } - foreach (var node in updated) + var updated = new List(); + + if (options.FixIssues) { - Database.Update(node); + // iterate all valid nodes to see if these are parents for invalid nodes + foreach (var (nodeId, node) in validNodes) + { + if (!nodesToRebuild.TryGetValue(nodeId, out var invalidNodes)) continue; + + // now we can try to rebuild the invalid paths. + + foreach (var invalidNode in invalidNodes) + { + invalidNode.Level = (short)(node.Level + 1); + invalidNode.Path = node.Path + "," + invalidNode.NodeId; + updated.Add(invalidNode); + } + } + + foreach (var node in updated) + { + Database.Update(node); + } + } + + return new ContentDataIntegrityReport(report); + + // inline method used to append nodes to rebuild + static void AppendNodeToFix(IDictionary> nodesToRebuild, NodeDto node) + { + if (nodesToRebuild.TryGetValue(node.ParentId, out var childIds)) + childIds.Add(node); + else + nodesToRebuild[node.ParentId] = new List { node }; } } diff --git a/src/Umbraco.Core/Services/IContentServiceBase.cs b/src/Umbraco.Core/Services/IContentServiceBase.cs index 1c04e0b4a3..c40f49347f 100644 --- a/src/Umbraco.Core/Services/IContentServiceBase.cs +++ b/src/Umbraco.Core/Services/IContentServiceBase.cs @@ -1,4 +1,6 @@ -namespace Umbraco.Core.Services +using Umbraco.Core.Models; + +namespace Umbraco.Core.Services { /// /// Placeholder for sharing logic between the content, media (and member) services @@ -6,15 +8,9 @@ /// public interface IContentServiceBase : IService { - /// - /// Checks the data integrity of the node paths/levels stored in the database + /// Checks/fixes the data integrity of node paths/levels stored in the database /// - bool VerifyNodePaths(out int[] invalidIds); - - /// - /// Fixes the data integrity of node paths/levels stored in the database - /// - void FixNodePaths(); + ContentDataIntegrityReport CheckDataIntegrity(ContentDataIntegrityReportOptions options); } } diff --git a/src/Umbraco.Core/Services/Implement/ContentService.cs b/src/Umbraco.Core/Services/Implement/ContentService.cs index f5f3d03abb..1f384d694f 100644 --- a/src/Umbraco.Core/Services/Implement/ContentService.cs +++ b/src/Umbraco.Core/Services/Implement/ContentService.cs @@ -2383,23 +2383,13 @@ namespace Umbraco.Core.Services.Implement return OperationResult.Succeed(evtMsgs); } - public bool VerifyNodePaths(out int[] invalidIds) - { - using (var scope = ScopeProvider.CreateScope(autoComplete: true)) - { - scope.ReadLock(Constants.Locks.ContentTree); - return _documentRepository.VerifyNodePaths(out invalidIds); - } - } - - public void FixNodePaths() + public ContentDataIntegrityReport CheckDataIntegrity(ContentDataIntegrityReportOptions options) { using (var scope = ScopeProvider.CreateScope(autoComplete: true)) { scope.WriteLock(Constants.Locks.ContentTree); - _documentRepository.FixNodePaths(); - // TODO: We're going to have to clear all caches + return _documentRepository.CheckDataIntegrity(options); } } diff --git a/src/Umbraco.Core/Services/Implement/MediaService.cs b/src/Umbraco.Core/Services/Implement/MediaService.cs index 219beb7088..4a2b37ca31 100644 --- a/src/Umbraco.Core/Services/Implement/MediaService.cs +++ b/src/Umbraco.Core/Services/Implement/MediaService.cs @@ -1150,24 +1150,13 @@ namespace Umbraco.Core.Services.Implement } - - public bool VerifyNodePaths(out int[] invalidIds) - { - using (var scope = ScopeProvider.CreateScope(autoComplete: true)) - { - scope.ReadLock(Constants.Locks.MediaTree); - return _mediaRepository.VerifyNodePaths(out invalidIds); - } - } - - public void FixNodePaths() + public ContentDataIntegrityReport CheckDataIntegrity(ContentDataIntegrityReportOptions options) { using (var scope = ScopeProvider.CreateScope(autoComplete: true)) { scope.WriteLock(Constants.Locks.MediaTree); - _mediaRepository.FixNodePaths(); - // TODO: We're going to have to clear all caches + return _mediaRepository.CheckDataIntegrity(options); } } diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index efb76f2756..f15b098473 100755 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -132,6 +132,9 @@ + + + diff --git a/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs b/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs index d7bf62067f..e40be33fff 100644 --- a/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs +++ b/src/Umbraco.Web/HealthCheck/Checks/Data/DatabaseIntegrityCheck.cs @@ -3,13 +3,15 @@ using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; +using Serilog.Core; +using Umbraco.Core.Models; using Umbraco.Core.Services; namespace Umbraco.Web.HealthCheck.Checks.Data { [HealthCheck( "73DD0C1C-E0CA-4C31-9564-1DCA509788AF", - "Database integrity check", + "Database data integrity check", Description = "Checks for various data integrity issues in the Umbraco database.", Group = "Data Integrity")] public class DatabaseIntegrityCheck : HealthCheck @@ -18,6 +20,8 @@ namespace Umbraco.Web.HealthCheck.Checks.Data private readonly IMediaService _mediaService; private const string _fixMediaPaths = "fixMediaPaths"; private const string _fixContentPaths = "fixContentPaths"; + private const string _fixMediaPathsTitle = "Fix media paths"; + private const string _fixContentPathsTitle = "Fix content paths"; public DatabaseIntegrityCheck(IContentService contentService, IMediaService mediaService) { @@ -34,35 +38,29 @@ namespace Umbraco.Web.HealthCheck.Checks.Data //return the statuses return new[] { - CheckContent(), - CheckMedia() + CheckDocuments(false), + CheckMedia(false) }; } - private HealthCheckStatus CheckMedia() + private HealthCheckStatus CheckMedia(bool fix) { - return CheckPaths(_fixMediaPaths, "Fix media paths", "media", () => - { - var mediaPaths = _mediaService.VerifyNodePaths(out var invalidMediaPaths); - return (mediaPaths, invalidMediaPaths); - }); + return CheckPaths(_fixMediaPaths, _fixMediaPathsTitle, Core.Constants.UdiEntityType.Media, + () => _mediaService.CheckDataIntegrity(new ContentDataIntegrityReportOptions {FixIssues = fix})); } - private HealthCheckStatus CheckContent() + private HealthCheckStatus CheckDocuments(bool fix) { - return CheckPaths(_fixContentPaths, "Fix content paths", "content", () => - { - var contentPaths = _contentService.VerifyNodePaths(out var invalidContentPaths); - return (contentPaths, invalidContentPaths); - }); + return CheckPaths(_fixContentPaths, _fixContentPathsTitle, Core.Constants.UdiEntityType.Document, + () => _contentService.CheckDataIntegrity(new ContentDataIntegrityReportOptions {FixIssues = fix})); } - private HealthCheckStatus CheckPaths(string actionAlias, string actionName, string entityType, Func<(bool success, int[] invalidPaths)> doCheck) + private HealthCheckStatus CheckPaths(string actionAlias, string actionName, string entityType, Func doCheck) { var result = doCheck(); var actions = new List(); - if (!result.success) + if (!result.Ok) { actions.Add(new HealthCheckAction(actionAlias, Id) { @@ -70,28 +68,23 @@ namespace Umbraco.Web.HealthCheck.Checks.Data }); } - return new HealthCheckStatus(result.success + return new HealthCheckStatus(result.Ok ? $"All {entityType} paths are valid" - : $"There are {result.invalidPaths.Length} invalid {entityType} paths") + : $"There are {result.DetectedIssues.Count} invalid {entityType} paths") { - ResultType = result.success ? StatusResultType.Success : StatusResultType.Error, + ResultType = result.Ok ? StatusResultType.Success : StatusResultType.Error, Actions = actions }; } public override HealthCheckStatus ExecuteAction(HealthCheckAction action) { - switch (action.Alias) + return action.Alias switch { - case _fixContentPaths: - _contentService.FixNodePaths(); - return CheckContent(); - case _fixMediaPaths: - _mediaService.FixNodePaths(); - return CheckMedia(); - default: - throw new InvalidOperationException("Action not supported"); - } + _fixContentPaths => CheckDocuments(true), + _fixMediaPaths => CheckMedia(true), + _ => throw new InvalidOperationException("Action not supported") + }; } } } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs index 474c390027..e40e7130d0 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/ContentStore.cs @@ -525,7 +525,7 @@ namespace Umbraco.Web.PublishedCache.NuCache // because the data sort operation is by path. if (parent.Value == null) { - _logger.Warn($"Skip item id={kit.Node.Id}, no Data assigned for linked node with path {kit.Node.Path} and parent id {kit.Node.ParentContentId}. This can indicate data corruption for the Path value for node {kit.Node.Id}."); + _logger.Warn($"Skip item id={kit.Node.Id}, no Data assigned for linked node with path {kit.Node.Path} and parent id {kit.Node.ParentContentId}. This can indicate data corruption for the Path value for node {kit.Node.Id}. See the Health Check dashboard in Settings to resolve data integrity issues."); return false; }