Memory improvements to ShortStringHelper (#13089)

(cherry picked from commit b17d9004fd)
This commit is contained in:
patrickdemooij9
2023-03-28 10:18:41 +02:00
committed by Sebastiaan Janssen
parent eee6207f19
commit 5f8ba2e864
4 changed files with 3766 additions and 25 deletions

View File

@@ -1,4 +1,4 @@
using System.Diagnostics; using System.Diagnostics;
using System.Globalization; using System.Globalization;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using Umbraco.Cms.Core.Configuration.Models; using Umbraco.Cms.Core.Configuration.Models;
@@ -305,10 +305,10 @@ namespace Umbraco.Cms.Core.Strings
return text; return text;
} }
private static string RemoveSurrogatePairs(string text) private string RemoveSurrogatePairs(string text)
{ {
var input = text.ToCharArray(); var input = text.AsSpan();
var output = new char[input.Length]; Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[text.Length];
var opos = 0; var opos = 0;
for (var ipos = 0; ipos < input.Length; ipos++) for (var ipos = 0; ipos < input.Length; ipos++)
@@ -325,7 +325,7 @@ namespace Umbraco.Cms.Core.Strings
} }
} }
return new string(output, 0, opos); return new string(output);
} }
// here was a subtle, ascii-optimized version of the cleaning code, and I was // here was a subtle, ascii-optimized version of the cleaning code, and I was
@@ -347,7 +347,8 @@ namespace Umbraco.Cms.Core.Strings
// it's faster to use an array than a StringBuilder // it's faster to use an array than a StringBuilder
var ilen = input.Length; var ilen = input.Length;
var output = new char[ilen * 2]; // twice the length should be OK in all cases var totalSize = ilen * 2;
Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // twice the length should be OK in all cases
for (var i = 0; i < ilen; i++) for (var i = 0; i < ilen; i++)
{ {
@@ -479,11 +480,11 @@ namespace Umbraco.Cms.Core.Strings
throw new Exception("Invalid state."); throw new Exception("Invalid state.");
} }
return new string(output, 0, opos); return new string(output.Slice(0, opos));
} }
// note: supports surrogate pairs in input string // note: supports surrogate pairs in input string
internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym) internal void CopyTerm(string input, int ipos, Span<char> output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym)
{ {
var term = input.Substring(ipos, len); var term = input.Substring(ipos, len);
CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture); CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture);
@@ -509,19 +510,19 @@ namespace Umbraco.Cms.Core.Strings
//case CleanStringType.LowerCase: //case CleanStringType.LowerCase:
//case CleanStringType.UpperCase: //case CleanStringType.UpperCase:
case CleanStringType.Unchanged: case CleanStringType.Unchanged:
term.CopyTo(0, output, opos, len); term.CopyTo(output.Slice(opos, len));
opos += len; opos += len;
break; break;
case CleanStringType.LowerCase: case CleanStringType.LowerCase:
term = term.ToLower(cultureInfo); term = term.ToLower(cultureInfo);
term.CopyTo(0, output, opos, term.Length); term.CopyTo(output.Slice(opos, term.Length));
opos += term.Length; opos += term.Length;
break; break;
case CleanStringType.UpperCase: case CleanStringType.UpperCase:
term = term.ToUpper(cultureInfo); term = term.ToUpper(cultureInfo);
term.CopyTo(0, output, opos, term.Length); term.CopyTo(output.Slice(opos, term.Length));
opos += term.Length; opos += term.Length;
break; break;
@@ -532,7 +533,7 @@ namespace Umbraco.Cms.Core.Strings
{ {
s = term.Substring(ipos, 2); s = term.Substring(ipos, 2);
s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo); s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo);
s.CopyTo(0, output, opos, s.Length); s.CopyTo(output.Slice(opos, s.Length));
opos += s.Length; opos += s.Length;
i++; // surrogate pair len is 2 i++; // surrogate pair len is 2
} }
@@ -543,7 +544,7 @@ namespace Umbraco.Cms.Core.Strings
if (len > i) if (len > i)
{ {
term = term.Substring(i).ToLower(cultureInfo); term = term.Substring(i).ToLower(cultureInfo);
term.CopyTo(0, output, opos, term.Length); term.CopyTo(output.Slice(opos, term.Length));
opos += term.Length; opos += term.Length;
} }
break; break;
@@ -555,7 +556,7 @@ namespace Umbraco.Cms.Core.Strings
{ {
s = term.Substring(ipos, 2); s = term.Substring(ipos, 2);
s = s.ToUpper(cultureInfo); s = s.ToUpper(cultureInfo);
s.CopyTo(0, output, opos, s.Length); s.CopyTo(output.Slice(opos, s.Length));
opos += s.Length; opos += s.Length;
i++; // surrogate pair len is 2 i++; // surrogate pair len is 2
} }
@@ -566,7 +567,7 @@ namespace Umbraco.Cms.Core.Strings
if (len > i) if (len > i)
{ {
term = term.Substring(i).ToLower(cultureInfo); term = term.Substring(i).ToLower(cultureInfo);
term.CopyTo(0, output, opos, term.Length); term.CopyTo(output.Slice(opos, term.Length));
opos += term.Length; opos += term.Length;
} }
break; break;
@@ -578,7 +579,7 @@ namespace Umbraco.Cms.Core.Strings
{ {
s = term.Substring(ipos, 2); s = term.Substring(ipos, 2);
s = opos == 0 ? s : s.ToUpper(cultureInfo); s = opos == 0 ? s : s.ToUpper(cultureInfo);
s.CopyTo(0, output, opos, s.Length); s.CopyTo(output.Slice(opos, s.Length));
opos += s.Length; opos += s.Length;
i++; // surrogate pair len is 2 i++; // surrogate pair len is 2
} }
@@ -589,7 +590,7 @@ namespace Umbraco.Cms.Core.Strings
if (len > i) if (len > i)
{ {
term = term.Substring(i); term = term.Substring(i);
term.CopyTo(0, output, opos, term.Length); term.CopyTo(output.Slice(opos, term.Length));
opos += term.Length; opos += term.Length;
} }
break; break;

View File

@@ -11,21 +11,27 @@ namespace Umbraco.Cms.Core.Strings;
/// </remarks> /// </remarks>
public static class Utf8ToAsciiConverter public static class Utf8ToAsciiConverter
{ {
[Obsolete("Use ToAsciiString(ReadOnlySpan<char>..) instead")]
public static string ToAsciiString(string text, char fail = '?')
{
return ToAsciiString(text.AsSpan(), fail);
}
/// <summary> /// <summary>
/// Converts an Utf8 string into an Ascii string. /// Converts an Utf8 string into an Ascii string.
/// </summary> /// </summary>
/// <param name="text">The text to convert.</param> /// <param name="text">The text to convert.</param>
/// <param name="fail">The character to use to replace characters that cannot properly be converted.</param> /// <param name="fail">The character to use to replace characters that cannot properly be converted.</param>
/// <returns>The converted text.</returns> /// <returns>The converted text.</returns>
public static string ToAsciiString(string text, char fail = '?') public static string ToAsciiString(ReadOnlySpan<char> text, char fail = '?')
{ {
var input = text.ToCharArray();
// this is faster although it uses more memory // this is faster although it uses more memory
// but... we should be filtering short strings only... // but... we should be filtering short strings only...
var output = new char[input.Length * 3]; // *3 because of things such as OE
var len = ToAscii(input, output, fail); var totalSize = text.Length * 3;
return new string(output, 0, len); Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE
var len = ToAscii(text, output, fail);
return new string(output[..len]);
// var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra // var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
// ToAscii(input, output); // ToAscii(input, output);
@@ -66,7 +72,7 @@ public static class Utf8ToAsciiConverter
/// <returns>The number of characters in the output array.</returns> /// <returns>The number of characters in the output array.</returns>
/// <remarks>The caller must ensure that the output array is big enough.</remarks> /// <remarks>The caller must ensure that the output array is big enough.</remarks>
/// <exception cref="OverflowException">The output array is not big enough.</exception> /// <exception cref="OverflowException">The output array is not big enough.</exception>
private static int ToAscii(char[] input, char[] output, char fail = '?') private static int ToAscii(ReadOnlySpan<char> input, Span<char> output, char fail = '?')
{ {
var opos = 0; var opos = 0;
@@ -121,7 +127,7 @@ public static class Utf8ToAsciiConverter
/// <para>Input should contain Utf8 characters exclusively and NOT Unicode.</para> /// <para>Input should contain Utf8 characters exclusively and NOT Unicode.</para>
/// <para>Removes controls, normalizes whitespaces, replaces symbols by '?'.</para> /// <para>Removes controls, normalizes whitespaces, replaces symbols by '?'.</para>
/// </remarks> /// </remarks>
private static void ToAscii(char[] input, int ipos, char[] output, ref int opos, char fail = '?') private static void ToAscii(ReadOnlySpan<char> input, int ipos, Span<char> output, ref int opos, char fail = '?')
{ {
var c = input[ipos]; var c = input[ipos];

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
using System;
using BenchmarkDotNet.Attributes;
using Umbraco.Cms.Core.Strings;
using Umbraco.Tests.Benchmarks.Config;
namespace Umbraco.Tests.Benchmarks;
[QuickRunWithMemoryDiagnoserConfig]
public class ShortStringHelperBenchmarks
{
private DefaultShortStringHelper _shortStringHelper;
private string _input;
[GlobalSetup]
public void Setup()
{
_shortStringHelper = new DefaultShortStringHelper(new DefaultShortStringHelperConfig());
_input = "This is a 🎈 balloon";
}
[Benchmark(Baseline = true)]
public void ToUrlSegment()
{
_shortStringHelper.CleanStringForUrlSegment(_input);
}
/*[Benchmark(Baseline = true)]
public string OldAsciString()
{
return OldUtf8ToAsciiConverter.ToAsciiString(_input);
}
[Benchmark]
public string NewAsciString()
{
return Utf8ToAsciiConverter.ToAsciiString(_input);
}*/
#region SurrogatePairs
/*[Benchmark(Baseline = true)]
public string RemoveSurrogatePairs()
{
var input = _input.ToCharArray();
var output = new char[input.Length];
var opos = 0;
for (var ipos = 0; ipos < input.Length; ipos++)
{
var c = input[ipos];
if (char.IsSurrogate(c)) // ignore high surrogate
{
ipos++; // and skip low surrogate
output[opos++] = '?';
}
else
{
output[opos++] = c;
}
}
return new string(output, 0, opos);
}
[Benchmark]
public string RemoveNewSurrogatePairs()
{
var input = _input.AsSpan();
Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[input.Length];
var opos = 0;
for (var ipos = 0; ipos < input.Length; ipos++)
{
var c = input[ipos];
if (char.IsSurrogate(c)) // ignore high surrogate
{
ipos++; // and skip low surrogate
output[opos++] = '?';
}
else
{
output[opos++] = c;
}
}
return new string(output);
}*/
#endregion
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
//|-----------------------------------:|---------:|---------:|--------:|------:|-------:|----------:|
//| ToUrlSegment | 464.2 ns | 34.88 ns | 1.91 ns | 1.00 | 0.1627 | 512 B |
//| ToUrlSegment (With below changes) | 455.7 ns | 26.83 ns | 1.47 ns | 1.00 | 0.1182 | 384 B |
//| ToUrlSegment(CleanCodeString change| 420.6 ns | 64.06 ns | 3.51 ns | 1.00 | 0.0856 | 280 B |
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
//|------------------------ |---------:|----------:|---------:|------:|-------:|----------:|
//| RemoveSurrogatePairs | 70.75 ns | 15.307 ns | 0.839 ns | 1.00 | 0.0610 | 192 B |
//| RemoveNewSurrogatePairs | 58.44 ns | 7.297 ns | 0.400 ns | 0.83 | 0.0198 | 64 B |
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
//|-------------- |---------:|---------:|--------:|------:|-------:|----------:|
//| OldAsciString | 181.4 ns | 11.50 ns | 0.63 ns | 1.00 | 0.0851 | 272 B |
//| NewAsciString | 180.7 ns | 5.35 ns | 0.29 ns | 1.00 | 0.0450 | 64 B |
}