2020-01-29 09:53:08 +02:00
|
|
|
|
using System;
|
|
|
|
|
|
using System.Collections.Generic;
|
2019-10-21 23:53:14 +11:00
|
|
|
|
using System.Text.RegularExpressions;
|
2021-02-18 11:06:02 +01:00
|
|
|
|
using Umbraco.Cms.Core.Routing;
|
|
|
|
|
|
using Umbraco.Extensions;
|
2019-10-21 23:53:14 +11:00
|
|
|
|
|
2021-02-18 11:06:02 +01:00
|
|
|
|
namespace Umbraco.Cms.Core.Templates
|
2019-10-21 23:53:14 +11:00
|
|
|
|
{
|
2019-10-22 11:09:21 +11:00
|
|
|
|
|
2019-10-23 14:55:18 +11:00
|
|
|
|
public sealed class HtmlImageSourceParser
|
2019-10-21 23:53:14 +11:00
|
|
|
|
{
|
2020-01-29 09:53:08 +02:00
|
|
|
|
public HtmlImageSourceParser(Func<Guid, string> getMediaUrl)
|
|
|
|
|
|
{
|
|
|
|
|
|
this._getMediaUrl = getMediaUrl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-02-14 13:04:49 +01:00
|
|
|
|
private readonly IPublishedUrlProvider _publishedUrlProvider;
|
|
|
|
|
|
|
|
|
|
|
|
public HtmlImageSourceParser(IPublishedUrlProvider publishedUrlProvider)
|
2019-10-21 23:53:14 +11:00
|
|
|
|
{
|
2020-02-14 13:04:49 +01:00
|
|
|
|
_publishedUrlProvider = publishedUrlProvider;
|
2019-10-21 23:53:14 +11:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-01-29 09:53:08 +02:00
|
|
|
|
private static readonly Regex ResolveImgPattern = new Regex(@"(<img[^>]*src="")([^""\?]*)((?:\?[^""]*)?""[^>]*data-udi="")([^""]*)(""[^>]*>)",
|
2019-10-21 23:53:14 +11:00
|
|
|
|
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
|
|
|
2019-10-22 15:48:47 +11:00
|
|
|
|
private static readonly Regex DataUdiAttributeRegex = new Regex(@"data-udi=\\?(?:""|')(?<udi>umb://[A-z0-9\-]+/[A-z0-9]+)\\?(?:""|')",
|
|
|
|
|
|
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
|
|
|
|
|
|
|
2020-02-06 15:10:05 +01:00
|
|
|
|
private Func<Guid, string> _getMediaUrl;
|
2020-01-29 09:53:08 +02:00
|
|
|
|
|
2019-10-22 15:48:47 +11:00
|
|
|
|
/// <summary>
|
2019-10-25 15:08:56 +11:00
|
|
|
|
/// Parses out media UDIs from an html string based on 'data-udi' html attributes
|
2019-10-22 15:48:47 +11:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="text"></param>
|
|
|
|
|
|
/// <returns></returns>
|
|
|
|
|
|
public IEnumerable<Udi> FindUdisFromDataAttributes(string text)
|
|
|
|
|
|
{
|
|
|
|
|
|
var matches = DataUdiAttributeRegex.Matches(text);
|
|
|
|
|
|
if (matches.Count == 0)
|
|
|
|
|
|
yield break;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (Match match in matches)
|
|
|
|
|
|
{
|
2019-12-10 12:37:52 +01:00
|
|
|
|
if (match.Groups.Count == 2 && UdiParser.TryParse(match.Groups[1].Value, out var udi))
|
2019-10-22 15:48:47 +11:00
|
|
|
|
yield return udi;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-21 23:53:14 +11:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Parses the string looking for Umbraco image tags and updates them to their up-to-date image sources.
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="text"></param>
|
|
|
|
|
|
/// <returns></returns>
|
|
|
|
|
|
/// <remarks>Umbraco image tags are identified by their data-udi attributes</remarks>
|
|
|
|
|
|
public string EnsureImageSources(string text)
|
|
|
|
|
|
{
|
2020-02-06 15:10:05 +01:00
|
|
|
|
if(_getMediaUrl == null)
|
2020-02-14 13:04:49 +01:00
|
|
|
|
_getMediaUrl = (guid) => _publishedUrlProvider.GetMediaUrl(guid);
|
2019-10-21 23:53:14 +11:00
|
|
|
|
|
|
|
|
|
|
return ResolveImgPattern.Replace(text, match =>
|
|
|
|
|
|
{
|
|
|
|
|
|
// match groups:
|
|
|
|
|
|
// - 1 = from the beginning of the image tag until src attribute value begins
|
|
|
|
|
|
// - 2 = the src attribute value excluding the querystring (if present)
|
|
|
|
|
|
// - 3 = anything after group 2 and before the data-udi attribute value begins
|
|
|
|
|
|
// - 4 = the data-udi attribute value
|
|
|
|
|
|
// - 5 = anything after group 4 until the image tag is closed
|
|
|
|
|
|
var udi = match.Groups[4].Value;
|
2019-12-10 12:37:52 +01:00
|
|
|
|
if (udi.IsNullOrWhiteSpace() ||UdiParser.TryParse<GuidUdi>(udi, out var guidUdi) == false)
|
2019-10-21 23:53:14 +11:00
|
|
|
|
{
|
|
|
|
|
|
return match.Value;
|
|
|
|
|
|
}
|
2020-01-29 09:53:08 +02:00
|
|
|
|
var mediaUrl = _getMediaUrl(guidUdi.Guid);
|
2019-10-22 00:53:52 +11:00
|
|
|
|
if (mediaUrl == null)
|
2019-10-21 23:53:14 +11:00
|
|
|
|
{
|
|
|
|
|
|
// image does not exist - we could choose to remove the image entirely here (return empty string),
|
|
|
|
|
|
// but that would leave the editors completely in the dark as to why the image doesn't show
|
|
|
|
|
|
return match.Value;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-22 00:53:52 +11:00
|
|
|
|
return $"{match.Groups[1].Value}{mediaUrl}{match.Groups[3].Value}{udi}{match.Groups[5].Value}";
|
2019-10-21 23:53:14 +11:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2020-10-05 20:48:38 +02:00
|
|
|
|
/// Removes media URLs from <img> tags where a data-udi attribute is present
|
2019-10-21 23:53:14 +11:00
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="text"></param>
|
|
|
|
|
|
/// <returns></returns>
|
2019-10-23 14:55:18 +11:00
|
|
|
|
public string RemoveImageSources(string text)
|
2019-10-21 23:53:14 +11:00
|
|
|
|
// see comment in ResolveMediaFromTextString for group reference
|
|
|
|
|
|
=> ResolveImgPattern.Replace(text, "$1$3$4$5");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|