Memory improvements to ShortStringHelper (#13089)
(cherry picked from commit b17d9004fd)
This commit is contained in:
committed by
Sebastiaan Janssen
parent
eee6207f19
commit
5f8ba2e864
@@ -1,4 +1,4 @@
|
|||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using Umbraco.Cms.Core.Configuration.Models;
|
using Umbraco.Cms.Core.Configuration.Models;
|
||||||
@@ -305,10 +305,10 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string RemoveSurrogatePairs(string text)
|
private string RemoveSurrogatePairs(string text)
|
||||||
{
|
{
|
||||||
var input = text.ToCharArray();
|
var input = text.AsSpan();
|
||||||
var output = new char[input.Length];
|
Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[text.Length];
|
||||||
var opos = 0;
|
var opos = 0;
|
||||||
|
|
||||||
for (var ipos = 0; ipos < input.Length; ipos++)
|
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||||
@@ -325,7 +325,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new string(output, 0, opos);
|
return new string(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
// here was a subtle, ascii-optimized version of the cleaning code, and I was
|
// here was a subtle, ascii-optimized version of the cleaning code, and I was
|
||||||
@@ -347,7 +347,8 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
|
|
||||||
// it's faster to use an array than a StringBuilder
|
// it's faster to use an array than a StringBuilder
|
||||||
var ilen = input.Length;
|
var ilen = input.Length;
|
||||||
var output = new char[ilen * 2]; // twice the length should be OK in all cases
|
var totalSize = ilen * 2;
|
||||||
|
Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // twice the length should be OK in all cases
|
||||||
|
|
||||||
for (var i = 0; i < ilen; i++)
|
for (var i = 0; i < ilen; i++)
|
||||||
{
|
{
|
||||||
@@ -479,11 +480,11 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
throw new Exception("Invalid state.");
|
throw new Exception("Invalid state.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new string(output, 0, opos);
|
return new string(output.Slice(0, opos));
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: supports surrogate pairs in input string
|
// note: supports surrogate pairs in input string
|
||||||
internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym)
|
internal void CopyTerm(string input, int ipos, Span<char> output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym)
|
||||||
{
|
{
|
||||||
var term = input.Substring(ipos, len);
|
var term = input.Substring(ipos, len);
|
||||||
CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture);
|
CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture);
|
||||||
@@ -509,19 +510,19 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
//case CleanStringType.LowerCase:
|
//case CleanStringType.LowerCase:
|
||||||
//case CleanStringType.UpperCase:
|
//case CleanStringType.UpperCase:
|
||||||
case CleanStringType.Unchanged:
|
case CleanStringType.Unchanged:
|
||||||
term.CopyTo(0, output, opos, len);
|
term.CopyTo(output.Slice(opos, len));
|
||||||
opos += len;
|
opos += len;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CleanStringType.LowerCase:
|
case CleanStringType.LowerCase:
|
||||||
term = term.ToLower(cultureInfo);
|
term = term.ToLower(cultureInfo);
|
||||||
term.CopyTo(0, output, opos, term.Length);
|
term.CopyTo(output.Slice(opos, term.Length));
|
||||||
opos += term.Length;
|
opos += term.Length;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CleanStringType.UpperCase:
|
case CleanStringType.UpperCase:
|
||||||
term = term.ToUpper(cultureInfo);
|
term = term.ToUpper(cultureInfo);
|
||||||
term.CopyTo(0, output, opos, term.Length);
|
term.CopyTo(output.Slice(opos, term.Length));
|
||||||
opos += term.Length;
|
opos += term.Length;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -532,7 +533,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
{
|
{
|
||||||
s = term.Substring(ipos, 2);
|
s = term.Substring(ipos, 2);
|
||||||
s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo);
|
s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo);
|
||||||
s.CopyTo(0, output, opos, s.Length);
|
s.CopyTo(output.Slice(opos, s.Length));
|
||||||
opos += s.Length;
|
opos += s.Length;
|
||||||
i++; // surrogate pair len is 2
|
i++; // surrogate pair len is 2
|
||||||
}
|
}
|
||||||
@@ -543,7 +544,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
if (len > i)
|
if (len > i)
|
||||||
{
|
{
|
||||||
term = term.Substring(i).ToLower(cultureInfo);
|
term = term.Substring(i).ToLower(cultureInfo);
|
||||||
term.CopyTo(0, output, opos, term.Length);
|
term.CopyTo(output.Slice(opos, term.Length));
|
||||||
opos += term.Length;
|
opos += term.Length;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -555,7 +556,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
{
|
{
|
||||||
s = term.Substring(ipos, 2);
|
s = term.Substring(ipos, 2);
|
||||||
s = s.ToUpper(cultureInfo);
|
s = s.ToUpper(cultureInfo);
|
||||||
s.CopyTo(0, output, opos, s.Length);
|
s.CopyTo(output.Slice(opos, s.Length));
|
||||||
opos += s.Length;
|
opos += s.Length;
|
||||||
i++; // surrogate pair len is 2
|
i++; // surrogate pair len is 2
|
||||||
}
|
}
|
||||||
@@ -566,7 +567,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
if (len > i)
|
if (len > i)
|
||||||
{
|
{
|
||||||
term = term.Substring(i).ToLower(cultureInfo);
|
term = term.Substring(i).ToLower(cultureInfo);
|
||||||
term.CopyTo(0, output, opos, term.Length);
|
term.CopyTo(output.Slice(opos, term.Length));
|
||||||
opos += term.Length;
|
opos += term.Length;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -578,7 +579,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
{
|
{
|
||||||
s = term.Substring(ipos, 2);
|
s = term.Substring(ipos, 2);
|
||||||
s = opos == 0 ? s : s.ToUpper(cultureInfo);
|
s = opos == 0 ? s : s.ToUpper(cultureInfo);
|
||||||
s.CopyTo(0, output, opos, s.Length);
|
s.CopyTo(output.Slice(opos, s.Length));
|
||||||
opos += s.Length;
|
opos += s.Length;
|
||||||
i++; // surrogate pair len is 2
|
i++; // surrogate pair len is 2
|
||||||
}
|
}
|
||||||
@@ -589,7 +590,7 @@ namespace Umbraco.Cms.Core.Strings
|
|||||||
if (len > i)
|
if (len > i)
|
||||||
{
|
{
|
||||||
term = term.Substring(i);
|
term = term.Substring(i);
|
||||||
term.CopyTo(0, output, opos, term.Length);
|
term.CopyTo(output.Slice(opos, term.Length));
|
||||||
opos += term.Length;
|
opos += term.Length;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -11,21 +11,27 @@ namespace Umbraco.Cms.Core.Strings;
|
|||||||
/// </remarks>
|
/// </remarks>
|
||||||
public static class Utf8ToAsciiConverter
|
public static class Utf8ToAsciiConverter
|
||||||
{
|
{
|
||||||
|
[Obsolete("Use ToAsciiString(ReadOnlySpan<char>..) instead")]
|
||||||
|
public static string ToAsciiString(string text, char fail = '?')
|
||||||
|
{
|
||||||
|
return ToAsciiString(text.AsSpan(), fail);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Converts an Utf8 string into an Ascii string.
|
/// Converts an Utf8 string into an Ascii string.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="text">The text to convert.</param>
|
/// <param name="text">The text to convert.</param>
|
||||||
/// <param name="fail">The character to use to replace characters that cannot properly be converted.</param>
|
/// <param name="fail">The character to use to replace characters that cannot properly be converted.</param>
|
||||||
/// <returns>The converted text.</returns>
|
/// <returns>The converted text.</returns>
|
||||||
public static string ToAsciiString(string text, char fail = '?')
|
public static string ToAsciiString(ReadOnlySpan<char> text, char fail = '?')
|
||||||
{
|
{
|
||||||
var input = text.ToCharArray();
|
|
||||||
|
|
||||||
// this is faster although it uses more memory
|
// this is faster although it uses more memory
|
||||||
// but... we should be filtering short strings only...
|
// but... we should be filtering short strings only...
|
||||||
var output = new char[input.Length * 3]; // *3 because of things such as OE
|
|
||||||
var len = ToAscii(input, output, fail);
|
var totalSize = text.Length * 3;
|
||||||
return new string(output, 0, len);
|
Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE
|
||||||
|
var len = ToAscii(text, output, fail);
|
||||||
|
return new string(output[..len]);
|
||||||
|
|
||||||
// var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
|
// var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
|
||||||
// ToAscii(input, output);
|
// ToAscii(input, output);
|
||||||
@@ -66,7 +72,7 @@ public static class Utf8ToAsciiConverter
|
|||||||
/// <returns>The number of characters in the output array.</returns>
|
/// <returns>The number of characters in the output array.</returns>
|
||||||
/// <remarks>The caller must ensure that the output array is big enough.</remarks>
|
/// <remarks>The caller must ensure that the output array is big enough.</remarks>
|
||||||
/// <exception cref="OverflowException">The output array is not big enough.</exception>
|
/// <exception cref="OverflowException">The output array is not big enough.</exception>
|
||||||
private static int ToAscii(char[] input, char[] output, char fail = '?')
|
private static int ToAscii(ReadOnlySpan<char> input, Span<char> output, char fail = '?')
|
||||||
{
|
{
|
||||||
var opos = 0;
|
var opos = 0;
|
||||||
|
|
||||||
@@ -121,7 +127,7 @@ public static class Utf8ToAsciiConverter
|
|||||||
/// <para>Input should contain Utf8 characters exclusively and NOT Unicode.</para>
|
/// <para>Input should contain Utf8 characters exclusively and NOT Unicode.</para>
|
||||||
/// <para>Removes controls, normalizes whitespaces, replaces symbols by '?'.</para>
|
/// <para>Removes controls, normalizes whitespaces, replaces symbols by '?'.</para>
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
private static void ToAscii(char[] input, int ipos, char[] output, ref int opos, char fail = '?')
|
private static void ToAscii(ReadOnlySpan<char> input, int ipos, Span<char> output, ref int opos, char fail = '?')
|
||||||
{
|
{
|
||||||
var c = input[ipos];
|
var c = input[ipos];
|
||||||
|
|
||||||
|
|||||||
3626
tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs
Normal file
3626
tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs
Normal file
File diff suppressed because it is too large
Load Diff
108
tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs
Normal file
108
tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
using System;
|
||||||
|
using BenchmarkDotNet.Attributes;
|
||||||
|
using Umbraco.Cms.Core.Strings;
|
||||||
|
using Umbraco.Tests.Benchmarks.Config;
|
||||||
|
|
||||||
|
namespace Umbraco.Tests.Benchmarks;
|
||||||
|
|
||||||
|
[QuickRunWithMemoryDiagnoserConfig]
|
||||||
|
public class ShortStringHelperBenchmarks
|
||||||
|
{
|
||||||
|
private DefaultShortStringHelper _shortStringHelper;
|
||||||
|
|
||||||
|
private string _input;
|
||||||
|
|
||||||
|
[GlobalSetup]
|
||||||
|
public void Setup()
|
||||||
|
{
|
||||||
|
_shortStringHelper = new DefaultShortStringHelper(new DefaultShortStringHelperConfig());
|
||||||
|
_input = "This is a 🎈 balloon";
|
||||||
|
}
|
||||||
|
|
||||||
|
[Benchmark(Baseline = true)]
|
||||||
|
public void ToUrlSegment()
|
||||||
|
{
|
||||||
|
_shortStringHelper.CleanStringForUrlSegment(_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*[Benchmark(Baseline = true)]
|
||||||
|
public string OldAsciString()
|
||||||
|
{
|
||||||
|
return OldUtf8ToAsciiConverter.ToAsciiString(_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[Benchmark]
|
||||||
|
public string NewAsciString()
|
||||||
|
{
|
||||||
|
return Utf8ToAsciiConverter.ToAsciiString(_input);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
#region SurrogatePairs
|
||||||
|
|
||||||
|
/*[Benchmark(Baseline = true)]
|
||||||
|
public string RemoveSurrogatePairs()
|
||||||
|
{
|
||||||
|
var input = _input.ToCharArray();
|
||||||
|
var output = new char[input.Length];
|
||||||
|
var opos = 0;
|
||||||
|
|
||||||
|
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||||
|
{
|
||||||
|
var c = input[ipos];
|
||||||
|
if (char.IsSurrogate(c)) // ignore high surrogate
|
||||||
|
{
|
||||||
|
ipos++; // and skip low surrogate
|
||||||
|
output[opos++] = '?';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
output[opos++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new string(output, 0, opos);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Benchmark]
|
||||||
|
public string RemoveNewSurrogatePairs()
|
||||||
|
{
|
||||||
|
var input = _input.AsSpan();
|
||||||
|
Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[input.Length];
|
||||||
|
var opos = 0;
|
||||||
|
|
||||||
|
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||||
|
{
|
||||||
|
var c = input[ipos];
|
||||||
|
if (char.IsSurrogate(c)) // ignore high surrogate
|
||||||
|
{
|
||||||
|
ipos++; // and skip low surrogate
|
||||||
|
output[opos++] = '?';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
output[opos++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new string(output);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||||
|
//|-----------------------------------:|---------:|---------:|--------:|------:|-------:|----------:|
|
||||||
|
//| ToUrlSegment | 464.2 ns | 34.88 ns | 1.91 ns | 1.00 | 0.1627 | 512 B |
|
||||||
|
//| ToUrlSegment (With below changes) | 455.7 ns | 26.83 ns | 1.47 ns | 1.00 | 0.1182 | 384 B |
|
||||||
|
//| ToUrlSegment(CleanCodeString change| 420.6 ns | 64.06 ns | 3.51 ns | 1.00 | 0.0856 | 280 B |
|
||||||
|
|
||||||
|
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||||
|
//|------------------------ |---------:|----------:|---------:|------:|-------:|----------:|
|
||||||
|
//| RemoveSurrogatePairs | 70.75 ns | 15.307 ns | 0.839 ns | 1.00 | 0.0610 | 192 B |
|
||||||
|
//| RemoveNewSurrogatePairs | 58.44 ns | 7.297 ns | 0.400 ns | 0.83 | 0.0198 | 64 B |
|
||||||
|
|
||||||
|
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||||
|
//|-------------- |---------:|---------:|--------:|------:|-------:|----------:|
|
||||||
|
//| OldAsciString | 181.4 ns | 11.50 ns | 0.63 ns | 1.00 | 0.0851 | 272 B |
|
||||||
|
//| NewAsciString | 180.7 ns | 5.35 ns | 0.29 ns | 1.00 | 0.0450 | 64 B |
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user