using System.Globalization; using System.Text.RegularExpressions; using Umbraco.Cms.Core.Routing; using Umbraco.Cms.Core.Web; namespace Umbraco.Cms.Core.Templates; /// /// Utility class used to parse internal links /// public sealed class HtmlLocalLinkParser { // needs to support media and document links, order of attributes should not matter nor should other attributes mess with things // media // other page internal static readonly Regex LocalLinkTagPattern = new( @"document|media)['""].*?(?href=[""']/{localLink:(?[a-fA-F0-9-]+)})[""'])|((?href=[""']/{localLink:(?[a-fA-F0-9-]+)})[""'].*?type=(['""])(?document|media)(?:['""])))|(?:(?:type=['""](?document|media)['""])|(?:(?href=[""']/{localLink:[a-fA-F0-9-]+})[""'])))[^>]*>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); internal static readonly Regex LocalLinkPattern = new( @"href=""[/]?(?:\{|\%7B)localLink:([a-zA-Z0-9-://]+)(?:\}|\%7D)", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); private readonly IPublishedUrlProvider _publishedUrlProvider; private readonly IUmbracoContextAccessor _umbracoContextAccessor; public HtmlLocalLinkParser( IUmbracoContextAccessor umbracoContextAccessor, IPublishedUrlProvider publishedUrlProvider) { _umbracoContextAccessor = umbracoContextAccessor; _publishedUrlProvider = publishedUrlProvider; } public IEnumerable FindUdisFromLocalLinks(string text) { foreach (LocalLinkTag tagData in FindLocalLinkIds(text)) { if (tagData.Udi is not null) { yield return tagData.Udi; // In v8, we only care about UDIs } } } /// /// Parses the string looking for the {localLink} syntax and updates them to their correct links. /// /// /// /// public string EnsureInternalLinks(string text, bool preview) { if (!_umbracoContextAccessor.TryGetUmbracoContext(out IUmbracoContext? umbracoContext)) { throw new InvalidOperationException("Could not parse internal links, there is no current UmbracoContext"); } if (!preview) { return EnsureInternalLinks(text); } using (umbracoContext.ForcedPreview(preview)) // force for URL provider { return EnsureInternalLinks(text); } } /// /// Parses the string looking for the {localLink} syntax and updates them to their correct links. /// /// /// public string EnsureInternalLinks(string text) { if (!_umbracoContextAccessor.TryGetUmbracoContext(out _)) { throw new InvalidOperationException("Could not parse internal links, there is no current UmbracoContext"); } foreach (LocalLinkTag tagData in FindLocalLinkIds(text)) { if (tagData.Udi is not null) { var newLink = "#"; if (tagData.Udi?.EntityType == Constants.UdiEntityType.Document) { newLink = _publishedUrlProvider.GetUrl(tagData.Udi.Guid); } else if (tagData.Udi?.EntityType == Constants.UdiEntityType.Media) { newLink = _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid); } text = StripTypeAttributeFromTag(text, tagData.Udi!.EntityType); text = text.Replace(tagData.TagHref, "href=\"" + newLink); } else if (tagData.IntId.HasValue) { var newLink = _publishedUrlProvider.GetUrl(tagData.IntId.Value); text = text.Replace(tagData.TagHref, "href=\"" + newLink); } } return text; } // under normal circumstances, the type attribute is preceded by a space // to cover the rare occasion where it isn't, we first replace with a a space and then without. private string StripTypeAttributeFromTag(string tag, string type) => tag.Replace($" type=\"{type}\"", string.Empty) .Replace($"type=\"{type}\"", string.Empty); private IEnumerable FindLocalLinkIds(string text) { MatchCollection localLinkTagMatches = LocalLinkTagPattern.Matches(text); foreach (Match linkTag in localLinkTagMatches) { if (linkTag.Groups.Count < 1) { continue; } if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false) { continue; } yield return new LocalLinkTag( null, new GuidUdi(linkTag.Groups["type"].Value, guid), linkTag.Groups["locallink"].Value, linkTag.Value); } // also return legacy results for values that have not been migrated foreach (LocalLinkTag legacyResult in FindLegacyLocalLinkIds(text)) { yield return legacyResult; } } // todo remove at some point? private IEnumerable FindLegacyLocalLinkIds(string text) { // Parse internal links MatchCollection tags = LocalLinkPattern.Matches(text); foreach (Match tag in tags) { if (tag.Groups.Count > 0) { var id = tag.Groups[1].Value; // .Remove(tag.Groups[1].Value.Length - 1, 1); // The id could be an int or a UDI if (UdiParser.TryParse(id, out Udi? udi)) { var guidUdi = udi as GuidUdi; if (guidUdi is not null) { yield return new LocalLinkTag(null, guidUdi, tag.Value, null); } } if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId)) { yield return new LocalLinkTag (intId, null, tag.Value, null); } } } } private class LocalLinkTag { public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref) { IntId = intId; Udi = udi; TagHref = tagHref; } public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref, string? fullTag) { IntId = intId; Udi = udi; TagHref = tagHref; FullTag = fullTag; } public int? IntId { get; } public GuidUdi? Udi { get; } public string TagHref { get; } public string? FullTag { get; } } }