Migrations: Optimise ConvertLocalLinks migration to process data in pages, to avoid having to load all property data into memory (#21003)

* Optimize ConvertLocalLinks migration to process data in pages, to avoid having to load all property data into memory.

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Updated obsoletion warning.

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
(cherry picked from commit 742de79f46)
This commit is contained in:
Andy Butland
2025-12-02 02:09:54 +01:00
committed by Zeegaan
parent 1694e3bad2
commit f408d2a1b3

View File

@@ -17,6 +17,13 @@ using Umbraco.Extensions;
namespace Umbraco.Cms.Infrastructure.Migrations.Upgrade.V_15_0_0; namespace Umbraco.Cms.Infrastructure.Migrations.Upgrade.V_15_0_0;
/// <summary>
/// Migrates local links in content and media properties from the legacy format using UDIs
/// to the new one with GUIDs.
/// </summary>
/// <remarks>
/// See: https://github.com/umbraco/Umbraco-CMS/pull/17307.
/// </remarks>
public class ConvertLocalLinks : MigrationBase public class ConvertLocalLinks : MigrationBase
{ {
private readonly IUmbracoContextFactory _umbracoContextFactory; private readonly IUmbracoContextFactory _umbracoContextFactory;
@@ -30,7 +37,9 @@ public class ConvertLocalLinks : MigrationBase
private readonly ICoreScopeProvider _coreScopeProvider; private readonly ICoreScopeProvider _coreScopeProvider;
private readonly LocalLinkMigrationTracker _linkMigrationTracker; private readonly LocalLinkMigrationTracker _linkMigrationTracker;
[Obsolete("Use non obsoleted contructor instead")] /// <summary>
/// Initializes a new instance of the <see cref="ConvertLocalLinks"/> class.
/// </summary>
public ConvertLocalLinks( public ConvertLocalLinks(
IMigrationContext context, IMigrationContext context,
IUmbracoContextFactory umbracoContextFactory, IUmbracoContextFactory umbracoContextFactory,
@@ -57,6 +66,10 @@ public class ConvertLocalLinks : MigrationBase
_linkMigrationTracker = linkMigrationTracker; _linkMigrationTracker = linkMigrationTracker;
} }
/// <summary>
/// Initializes a new instance of the <see cref="ConvertLocalLinks"/> class.
/// </summary>
[Obsolete("Please use the constructor taking all parameters. Scheduled for removal along with all other migrations to 17 in Umbraco 18.")]
public ConvertLocalLinks( public ConvertLocalLinks(
IMigrationContext context, IMigrationContext context,
IUmbracoContextFactory umbracoContextFactory, IUmbracoContextFactory umbracoContextFactory,
@@ -83,6 +96,7 @@ public class ConvertLocalLinks : MigrationBase
{ {
} }
/// <inheritdoc/>
protected override void Migrate() protected override void Migrate()
{ {
IEnumerable<string> propertyEditorAliases = _localLinkProcessor.GetSupportedPropertyEditorAliases(); IEnumerable<string> propertyEditorAliases = _localLinkProcessor.GetSupportedPropertyEditorAliases();
@@ -116,7 +130,7 @@ public class ConvertLocalLinks : MigrationBase
_logger.LogInformation( _logger.LogInformation(
"Migration starting for all properties of type: {propertyEditorAlias}", "Migration starting for all properties of type: {propertyEditorAlias}",
propertyEditorAlias); propertyEditorAlias);
if (ProcessPropertyTypes(propertyTypes, languagesById)) if (ProcessPropertyTypes(propertyEditorAlias, propertyTypes, languagesById))
{ {
_logger.LogInformation( _logger.LogInformation(
"Migration succeeded for all properties of type: {propertyEditorAlias}", "Migration succeeded for all properties of type: {propertyEditorAlias}",
@@ -134,7 +148,7 @@ public class ConvertLocalLinks : MigrationBase
RebuildCache = true; RebuildCache = true;
} }
private bool ProcessPropertyTypes(IPropertyType[] propertyTypes, IDictionary<int, ILanguage> languagesById) private bool ProcessPropertyTypes(string propertyEditorAlias, IPropertyType[] propertyTypes, IDictionary<int, ILanguage> languagesById)
{ {
foreach (IPropertyType propertyType in propertyTypes) foreach (IPropertyType propertyType in propertyTypes)
{ {
@@ -145,112 +159,157 @@ public class ConvertLocalLinks : MigrationBase
?? throw new InvalidOperationException( ?? throw new InvalidOperationException(
"The data type value editor could not be fetched."); "The data type value editor could not be fetched.");
Sql<ISqlContext> sql = Sql() long propertyDataCount = Database.ExecuteScalar<long>(BuildPropertyDataSql(propertyType, true));
.Select<PropertyDataDto>() if (propertyDataCount == 0)
.From<PropertyDataDto>()
.InnerJoin<ContentVersionDto>()
.On<PropertyDataDto, ContentVersionDto>((propertyData, contentVersion) =>
propertyData.VersionId == contentVersion.Id)
.LeftJoin<DocumentVersionDto>()
.On<ContentVersionDto, DocumentVersionDto>((contentVersion, documentVersion) =>
contentVersion.Id == documentVersion.Id)
.Where<PropertyDataDto, ContentVersionDto, DocumentVersionDto>(
(propertyData, contentVersion, documentVersion) =>
(contentVersion.Current == true || documentVersion.Published == true)
&& propertyData.PropertyTypeId == propertyType.Id);
List<PropertyDataDto> propertyDataDtos = Database.Fetch<PropertyDataDto>(sql);
if (propertyDataDtos.Count < 1)
{ {
continue; continue;
} }
var updateBatch = propertyDataDtos.Select(propertyDataDto => _logger.LogInformation(
UpdateBatch.For(propertyDataDto, Database.StartSnapshot(propertyDataDto))).ToList(); "Migrating {PropertyDataCount} property data values for property {PropertyTypeAlias} ({PropertyTypeKey}) with property editor alias {PropertyEditorAlias}",
propertyDataCount,
propertyType.Alias,
propertyType.Key,
propertyEditorAlias);
var updatesToSkip = new ConcurrentBag<UpdateBatch<PropertyDataDto>>(); // Process in pages to avoid loading all property data from the database into memory at once.
Sql<ISqlContext> sql = BuildPropertyDataSql(propertyType);
var progress = 0; const int PageSize = 10000;
long pageNumber = 1;
void HandleUpdateBatch(UpdateBatch<PropertyDataDto> update) long pageCount = (propertyDataCount + PageSize - 1) / PageSize;
int processedCount = 0;
while (processedCount < propertyDataCount)
{ {
using UmbracoContextReference umbracoContextReference = _umbracoContextFactory.EnsureUmbracoContext(); Page<PropertyDataDto> propertyDataDtoPage = Database.Page<PropertyDataDto>(pageNumber, PageSize, sql);
if (propertyDataDtoPage.Items.Count == 0)
progress++;
if (progress % 100 == 0)
{ {
_logger.LogInformation(" - finíshed {progress} of {total} properties", progress, break;
updateBatch.Count);
} }
PropertyDataDto propertyDataDto = update.Poco; var updateBatchCollection = propertyDataDtoPage.Items
.Select(propertyDataDto =>
UpdateBatch.For(propertyDataDto, Database.StartSnapshot(propertyDataDto)))
.ToList();
if (ProcessPropertyDataDto(propertyDataDto, propertyType, languagesById, valueEditor) == false) var updatesToSkip = new ConcurrentBag<UpdateBatch<PropertyDataDto>>();
{
updatesToSkip.Add(update);
}
}
if (DatabaseType == DatabaseType.SQLite) var progress = 0;
{
// SQLite locks up if we run the migration in parallel, so... let's not. void HandleUpdateBatch(UpdateBatch<PropertyDataDto> update)
foreach (UpdateBatch<PropertyDataDto> update in updateBatch)
{ {
HandleUpdateBatch(update); using UmbracoContextReference umbracoContextReference = _umbracoContextFactory.EnsureUmbracoContext();
}
} progress++;
else if (progress % 100 == 0)
{
Parallel.ForEachAsync(updateBatch, async (update, token) =>
{
//Foreach here, but we need to suppress the flow before each task, but not the actuall await of the task
Task task;
using (ExecutionContext.SuppressFlow())
{ {
task = Task.Run( _logger.LogInformation(
() => " - finished {Progress} of {PageTotal} properties in page {PageNumber} of {PageCount}",
{ progress,
using ICoreScope scope = _coreScopeProvider.CreateCoreScope(); updateBatchCollection.Count,
scope.Complete(); pageNumber,
HandleUpdateBatch(update); pageCount);
},
token);
} }
await task; PropertyDataDto propertyDataDto = update.Poco;
}).GetAwaiter().GetResult();
if (ProcessPropertyDataDto(propertyDataDto, propertyType, languagesById, valueEditor) == false)
{
updatesToSkip.Add(update);
}
}
if (DatabaseType == DatabaseType.SQLite)
{
// SQLite locks up if we run the migration in parallel, so... let's not.
foreach (UpdateBatch<PropertyDataDto> update in updateBatchCollection)
{
HandleUpdateBatch(update);
}
}
else
{
Parallel.ForEachAsync(updateBatchCollection, async (update, token) =>
{
//Foreach here, but we need to suppress the flow before each task, but not the actual await of the task
Task task;
using (ExecutionContext.SuppressFlow())
{
task = Task.Run(
() =>
{
using ICoreScope scope = _coreScopeProvider.CreateCoreScope();
scope.Complete();
HandleUpdateBatch(update);
},
token);
}
await task;
}).GetAwaiter().GetResult();
}
updateBatchCollection.RemoveAll(updatesToSkip.Contains);
if (updateBatchCollection.Any() is false)
{
_logger.LogDebug(" - no properties to convert, continuing");
pageNumber++;
processedCount += propertyDataDtoPage.Items.Count;
continue;
}
_logger.LogInformation(" - {totalConverted} properties converted, saving...", updateBatchCollection.Count);
var result = Database.UpdateBatch(updateBatchCollection, new BatchOptions { BatchSize = 100 });
if (result != updateBatchCollection.Count)
{
throw new InvalidOperationException(
$"The database batch update was supposed to update {updateBatchCollection.Count} property DTO entries, but it updated {result} entries.");
}
_logger.LogDebug(
"Migration completed for property type: {propertyTypeName} (id: {propertyTypeId}, alias: {propertyTypeAlias}, editor alias: {propertyTypeEditorAlias}) - {updateCount} property DTO entries updated.",
propertyType.Name,
propertyType.Id,
propertyType.Alias,
propertyType.PropertyEditorAlias,
result);
pageNumber++;
processedCount += propertyDataDtoPage.Items.Count;
} }
updateBatch.RemoveAll(updatesToSkip.Contains);
if (updateBatch.Any() is false)
{
_logger.LogDebug(" - no properties to convert, continuing");
continue;
}
_logger.LogInformation(" - {totalConverted} properties converted, saving...", updateBatch.Count);
var result = Database.UpdateBatch(updateBatch, new BatchOptions { BatchSize = 100 });
if (result != updateBatch.Count)
{
throw new InvalidOperationException(
$"The database batch update was supposed to update {updateBatch.Count} property DTO entries, but it updated {result} entries.");
}
_logger.LogDebug(
"Migration completed for property type: {propertyTypeName} (id: {propertyTypeId}, alias: {propertyTypeAlias}, editor alias: {propertyTypeEditorAlias}) - {updateCount} property DTO entries updated.",
propertyType.Name,
propertyType.Id,
propertyType.Alias,
propertyType.PropertyEditorAlias,
result);
} }
return true; return true;
} }
private bool ProcessPropertyDataDto(PropertyDataDto propertyDataDto, IPropertyType propertyType, private Sql<ISqlContext> BuildPropertyDataSql(IPropertyType propertyType, bool isCount = false)
IDictionary<int, ILanguage> languagesById, IDataValueEditor valueEditor) {
Sql<ISqlContext> sql = isCount
? Sql().SelectCount()
: Sql().Select<PropertyDataDto>();
sql = sql.From<PropertyDataDto>()
.InnerJoin<ContentVersionDto>()
.On<PropertyDataDto, ContentVersionDto>((propertyData, contentVersion) =>
propertyData.VersionId == contentVersion.Id)
.LeftJoin<DocumentVersionDto>()
.On<ContentVersionDto, DocumentVersionDto>((contentVersion, documentVersion) =>
contentVersion.Id == documentVersion.Id)
.Where<PropertyDataDto, ContentVersionDto, DocumentVersionDto>(
(propertyData, contentVersion, documentVersion) =>
(contentVersion.Current || documentVersion.Published)
&& propertyData.PropertyTypeId == propertyType.Id);
return sql;
}
private bool ProcessPropertyDataDto(
PropertyDataDto propertyDataDto,
IPropertyType propertyType,
IDictionary<int, ILanguage> languagesById,
IDataValueEditor valueEditor)
{ {
// NOTE: some old property data DTOs can have variance defined, even if the property type no longer varies // NOTE: some old property data DTOs can have variance defined, even if the property type no longer varies
var culture = propertyType.VariesByCulture() var culture = propertyType.VariesByCulture()