2021-09-15 13:40:08 +02:00
using System.Globalization ;
2019-10-21 22:56:02 +11:00
using System.Text.RegularExpressions ;
2025-09-22 11:34:08 +02:00
using Umbraco.Cms.Core.Models.PublishedContent ;
2021-02-18 11:06:02 +01:00
using Umbraco.Cms.Core.Routing ;
2019-10-21 22:56:02 +11:00
2022-06-07 15:28:38 +02:00
namespace Umbraco.Cms.Core.Templates ;
/// <summary>
/// Utility class used to parse internal links
/// </summary>
public sealed class HtmlLocalLinkParser
2019-10-21 22:56:02 +11:00
{
2024-07-02 14:22:19 +02:00
// needs to support media and document links, order of attributes should not matter nor should other attributes mess with things
// <a type="media" href="/{localLink:7e21a725-b905-4c5f-86dc-8c41ec116e39}" title="media">media</a>
// <a type="document" href="/{localLink:eed5fc6b-96fd-45a5-a0f1-b1adfb483c2f}" title="other page">other page</a>
internal static readonly Regex LocalLinkTagPattern = new (
2024-10-16 16:53:10 +02:00
@"<a.+?href=['""](?<locallink>\/?{localLink:(?<guid>[a-fA-F0-9-]+)})[^>]*?>" ,
RegexOptions . IgnoreCase | RegexOptions . IgnorePatternWhitespace | RegexOptions . Singleline ) ;
internal static readonly Regex TypePattern = new (
"" "type=['" ] ( ? < type > ( ? : media | document ) ) [ ' "]" "" ,
2024-07-02 14:22:19 +02:00
RegexOptions . IgnoreCase | RegexOptions . IgnorePatternWhitespace ) ;
2022-06-07 15:28:38 +02:00
internal static readonly Regex LocalLinkPattern = new (
2024-10-16 16:53:10 +02:00
@"href=['""](?<locallink>\/?(?:\{|\%7B)localLink:(?<guid>[a-zA-Z0-9-://]+)(?:\}|\%7D))" ,
2022-06-07 15:28:38 +02:00
RegexOptions . IgnoreCase | RegexOptions . IgnorePatternWhitespace ) ;
2019-10-21 22:56:02 +11:00
2022-06-07 15:28:38 +02:00
private readonly IPublishedUrlProvider _publishedUrlProvider ;
2019-10-21 22:56:02 +11:00
2024-09-10 00:49:18 +09:00
public HtmlLocalLinkParser ( IPublishedUrlProvider publishedUrlProvider )
2022-06-07 15:28:38 +02:00
{
_publishedUrlProvider = publishedUrlProvider ;
}
2019-10-21 22:56:02 +11:00
2022-06-07 15:28:38 +02:00
public IEnumerable < Udi ? > FindUdisFromLocalLinks ( string text )
{
2024-07-23 10:25:54 +02:00
foreach ( LocalLinkTag tagData in FindLocalLinkIds ( text ) )
2019-10-22 15:48:47 +11:00
{
2024-07-23 10:25:54 +02:00
if ( tagData . Udi is not null )
2019-10-22 15:48:47 +11:00
{
2024-07-23 10:25:54 +02:00
yield return tagData . Udi ; // In v8, we only care about UDIs
2019-10-22 15:48:47 +11:00
}
}
2022-06-07 15:28:38 +02:00
}
2019-10-22 15:48:47 +11:00
2022-06-07 15:28:38 +02:00
/// <summary>
/// Parses the string looking for the {localLink} syntax and updates them to their correct links.
/// </summary>
2025-09-22 11:34:08 +02:00
[Obsolete("This method overload is no longer used in Umbraco and delegates to the overload without the preview parameter. Scheduled for removal in Umbraco 18.")]
2024-09-10 00:49:18 +09:00
public string EnsureInternalLinks ( string text , bool preview ) = > EnsureInternalLinks ( text ) ;
2019-10-21 22:56:02 +11:00
2022-06-07 15:28:38 +02:00
/// <summary>
/// Parses the string looking for the {localLink} syntax and updates them to their correct links.
/// </summary>
2025-09-22 11:34:08 +02:00
public string EnsureInternalLinks ( string text ) = > EnsureInternalLinks ( text , UrlMode . Default ) ;
/// <summary>
/// Parses the string looking for the {localLink} syntax and updates them to their correct links.
/// </summary>
public string EnsureInternalLinks ( string text , UrlMode urlMode )
2022-06-07 15:28:38 +02:00
{
2024-07-23 10:25:54 +02:00
foreach ( LocalLinkTag tagData in FindLocalLinkIds ( text ) )
2022-06-07 15:28:38 +02:00
{
2024-07-23 10:25:54 +02:00
if ( tagData . Udi is not null )
2019-10-22 15:48:47 +11:00
{
2024-10-16 16:53:10 +02:00
var newLink = tagData . Udi ? . EntityType switch
2019-10-22 15:48:47 +11:00
{
2025-09-22 11:34:08 +02:00
Constants . UdiEntityType . Document = > _publishedUrlProvider . GetUrl ( tagData . Udi . Guid , urlMode ) ,
Constants . UdiEntityType . Media = > _publishedUrlProvider . GetMediaUrl ( tagData . Udi . Guid , urlMode ) ,
2024-10-17 09:59:41 +02:00
_ = > string . Empty ,
2024-10-16 16:53:10 +02:00
} ;
2019-10-22 15:48:47 +11:00
2024-07-23 10:25:54 +02:00
text = StripTypeAttributeFromTag ( text , tagData . Udi ! . EntityType ) ;
2024-10-16 16:53:10 +02:00
text = text . Replace ( tagData . TagHref , newLink ) ;
2022-06-07 15:28:38 +02:00
}
2024-07-23 10:25:54 +02:00
else if ( tagData . IntId . HasValue )
2022-06-07 15:28:38 +02:00
{
2025-09-22 11:34:08 +02:00
var newLink = _publishedUrlProvider . GetUrl ( tagData . IntId . Value , urlMode ) ;
2024-10-16 16:53:10 +02:00
text = text . Replace ( tagData . TagHref , newLink ) ;
2022-06-07 15:28:38 +02:00
}
2019-10-22 15:48:47 +11:00
}
2022-06-07 15:28:38 +02:00
return text ;
}
2024-07-23 10:25:54 +02:00
// under normal circumstances, the type attribute is preceded by a space
2024-10-16 16:53:10 +02:00
// to cover the rare occasion where it isn't, we first replace with a space and then without.
2025-08-07 08:41:53 +02:00
private static string StripTypeAttributeFromTag ( string tag , string type ) = >
2024-07-23 10:25:54 +02:00
tag . Replace ( $" type=\" { type } \ "" , string . Empty )
. Replace ( $"type=\" { type } \ "" , string . Empty ) ;
private IEnumerable < LocalLinkTag > FindLocalLinkIds ( string text )
2024-07-02 14:22:19 +02:00
{
MatchCollection localLinkTagMatches = LocalLinkTagPattern . Matches ( text ) ;
foreach ( Match linkTag in localLinkTagMatches )
{
2024-10-16 16:53:10 +02:00
if ( Guid . TryParse ( linkTag . Groups [ "guid" ] . Value , out Guid guid ) is false )
2024-07-02 14:22:19 +02:00
{
continue ;
}
2024-10-16 16:53:10 +02:00
// Find the type attribute
Match typeMatch = TypePattern . Match ( linkTag . Value ) ;
if ( typeMatch . Success is false )
2024-07-02 14:22:19 +02:00
{
continue ;
}
2024-07-23 10:25:54 +02:00
yield return new LocalLinkTag (
null ,
2024-10-16 16:53:10 +02:00
new GuidUdi ( typeMatch . Groups [ "type" ] . Value , guid ) ,
linkTag . Groups [ "locallink" ] . Value ) ;
2024-07-02 14:22:19 +02:00
}
// also return legacy results for values that have not been migrated
2024-07-23 10:25:54 +02:00
foreach ( LocalLinkTag legacyResult in FindLegacyLocalLinkIds ( text ) )
2024-07-02 14:22:19 +02:00
{
yield return legacyResult ;
}
}
2024-11-04 12:29:55 +01:00
[Obsolete("This is a temporary method to support legacy formats until we are sure all data has been migration. Scheduled for removal in v17")]
public IEnumerable < LocalLinkTag > FindLegacyLocalLinkIds ( string text )
2022-06-07 15:28:38 +02:00
{
// Parse internal links
MatchCollection tags = LocalLinkPattern . Matches ( text ) ;
foreach ( Match tag in tags )
2019-10-22 15:48:47 +11:00
{
2024-10-16 16:53:10 +02:00
if ( tag . Groups . Count < = 0 )
2019-10-21 22:56:02 +11:00
{
2024-10-16 16:53:10 +02:00
continue ;
}
2019-10-21 22:56:02 +11:00
2024-10-16 16:53:10 +02:00
var id = tag . Groups [ "guid" ] . Value ;
2019-10-21 22:56:02 +11:00
2024-10-16 16:53:10 +02:00
// The id could be an int or a UDI
if ( UdiParser . TryParse ( id , out Udi ? udi ) )
{
if ( udi is GuidUdi guidUdi )
2022-06-07 15:28:38 +02:00
{
2024-10-16 16:53:10 +02:00
yield return new LocalLinkTag ( null , guidUdi , tag . Groups [ "locallink" ] . Value ) ;
2019-10-21 22:56:02 +11:00
}
}
2024-10-16 16:53:10 +02:00
if ( int . TryParse ( id , NumberStyles . Integer , CultureInfo . InvariantCulture , out var intId ) )
{
yield return new LocalLinkTag ( intId , null , tag . Groups [ "locallink" ] . Value ) ;
}
2019-10-21 22:56:02 +11:00
}
}
2024-07-23 10:25:54 +02:00
2024-11-04 12:29:55 +01:00
[Obsolete("This is a temporary method to support legacy formats until we are sure all data has been migration. Scheduled for removal in v17")]
public class LocalLinkTag
2024-07-23 10:25:54 +02:00
{
public LocalLinkTag ( int? intId , GuidUdi ? udi , string tagHref )
{
IntId = intId ;
Udi = udi ;
TagHref = tagHref ;
}
public int? IntId { get ; }
public GuidUdi ? Udi { get ; }
public string TagHref { get ; }
}
2019-10-21 22:56:02 +11:00
}