using System.Globalization; using System.Text.RegularExpressions; using Umbraco.Cms.Core.Routing; using Umbraco.Cms.Core.Web; namespace Umbraco.Cms.Core.Templates; /// /// Utility class used to parse internal links /// public sealed class HtmlLocalLinkParser { // needs to support media and document links, order of attributes should not matter nor should other attributes mess with things // media // other page internal static readonly Regex LocalLinkTagPattern = new( @"\/?{localLink:(?[a-fA-F0-9-]+)})[^>]*?>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); internal static readonly Regex TypePattern = new( """type=['"](?(?:media|document))['"]""", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); internal static readonly Regex LocalLinkPattern = new( @"href=['""](?\/?(?:\{|\%7B)localLink:(?[a-zA-Z0-9-://]+)(?:\}|\%7D))", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); private readonly IPublishedUrlProvider _publishedUrlProvider; public HtmlLocalLinkParser(IPublishedUrlProvider publishedUrlProvider) { _publishedUrlProvider = publishedUrlProvider; } public IEnumerable FindUdisFromLocalLinks(string text) { foreach (LocalLinkTag tagData in FindLocalLinkIds(text)) { if (tagData.Udi is not null) { yield return tagData.Udi; // In v8, we only care about UDIs } } } /// /// Parses the string looking for the {localLink} syntax and updates them to their correct links. /// /// /// /// public string EnsureInternalLinks(string text, bool preview) => EnsureInternalLinks(text); /// /// Parses the string looking for the {localLink} syntax and updates them to their correct links. /// /// /// public string EnsureInternalLinks(string text) { foreach (LocalLinkTag tagData in FindLocalLinkIds(text)) { if (tagData.Udi is not null) { var newLink = tagData.Udi?.EntityType switch { Constants.UdiEntityType.Document => _publishedUrlProvider.GetUrl(tagData.Udi.Guid), Constants.UdiEntityType.Media => _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid), _ => string.Empty, }; text = StripTypeAttributeFromTag(text, tagData.Udi!.EntityType); text = text.Replace(tagData.TagHref, newLink); } else if (tagData.IntId.HasValue) { var newLink = _publishedUrlProvider.GetUrl(tagData.IntId.Value); text = text.Replace(tagData.TagHref, newLink); } } return text; } // under normal circumstances, the type attribute is preceded by a space // to cover the rare occasion where it isn't, we first replace with a space and then without. private string StripTypeAttributeFromTag(string tag, string type) => tag.Replace($" type=\"{type}\"", string.Empty) .Replace($"type=\"{type}\"", string.Empty); private IEnumerable FindLocalLinkIds(string text) { MatchCollection localLinkTagMatches = LocalLinkTagPattern.Matches(text); foreach (Match linkTag in localLinkTagMatches) { if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false) { continue; } // Find the type attribute Match typeMatch = TypePattern.Match(linkTag.Value); if (typeMatch.Success is false) { continue; } yield return new LocalLinkTag( null, new GuidUdi(typeMatch.Groups["type"].Value, guid), linkTag.Groups["locallink"].Value); } // also return legacy results for values that have not been migrated foreach (LocalLinkTag legacyResult in FindLegacyLocalLinkIds(text)) { yield return legacyResult; } } // todo remove at some point? private IEnumerable FindLegacyLocalLinkIds(string text) { // Parse internal links MatchCollection tags = LocalLinkPattern.Matches(text); foreach (Match tag in tags) { if (tag.Groups.Count <= 0) { continue; } var id = tag.Groups["guid"].Value; // The id could be an int or a UDI if (UdiParser.TryParse(id, out Udi? udi)) { if (udi is GuidUdi guidUdi) { yield return new LocalLinkTag(null, guidUdi, tag.Groups["locallink"].Value); } } if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId)) { yield return new LocalLinkTag (intId, null, tag.Groups["locallink"].Value); } } } private class LocalLinkTag { public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref) { IntId = intId; Udi = udi; TagHref = tagHref; } public int? IntId { get; } public GuidUdi? Udi { get; } public string TagHref { get; } } }