Memory improvements to ShortStringHelper (#13089)
(cherry picked from commit b17d9004fd)
This commit is contained in:
committed by
Sebastiaan Janssen
parent
eee6207f19
commit
5f8ba2e864
@@ -1,4 +1,4 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Umbraco.Cms.Core.Configuration.Models;
|
||||
@@ -305,10 +305,10 @@ namespace Umbraco.Cms.Core.Strings
|
||||
return text;
|
||||
}
|
||||
|
||||
private static string RemoveSurrogatePairs(string text)
|
||||
private string RemoveSurrogatePairs(string text)
|
||||
{
|
||||
var input = text.ToCharArray();
|
||||
var output = new char[input.Length];
|
||||
var input = text.AsSpan();
|
||||
Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[text.Length];
|
||||
var opos = 0;
|
||||
|
||||
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||
@@ -325,7 +325,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
}
|
||||
}
|
||||
|
||||
return new string(output, 0, opos);
|
||||
return new string(output);
|
||||
}
|
||||
|
||||
// here was a subtle, ascii-optimized version of the cleaning code, and I was
|
||||
@@ -347,7 +347,8 @@ namespace Umbraco.Cms.Core.Strings
|
||||
|
||||
// it's faster to use an array than a StringBuilder
|
||||
var ilen = input.Length;
|
||||
var output = new char[ilen * 2]; // twice the length should be OK in all cases
|
||||
var totalSize = ilen * 2;
|
||||
Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // twice the length should be OK in all cases
|
||||
|
||||
for (var i = 0; i < ilen; i++)
|
||||
{
|
||||
@@ -479,11 +480,11 @@ namespace Umbraco.Cms.Core.Strings
|
||||
throw new Exception("Invalid state.");
|
||||
}
|
||||
|
||||
return new string(output, 0, opos);
|
||||
return new string(output.Slice(0, opos));
|
||||
}
|
||||
|
||||
// note: supports surrogate pairs in input string
|
||||
internal void CopyTerm(string input, int ipos, char[] output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym)
|
||||
internal void CopyTerm(string input, int ipos, Span<char> output, ref int opos, int len, CleanStringType caseType, string culture, bool isAcronym)
|
||||
{
|
||||
var term = input.Substring(ipos, len);
|
||||
CultureInfo cultureInfo = string.IsNullOrEmpty(culture) ? CultureInfo.InvariantCulture : CultureInfo.GetCultureInfo(culture);
|
||||
@@ -509,19 +510,19 @@ namespace Umbraco.Cms.Core.Strings
|
||||
//case CleanStringType.LowerCase:
|
||||
//case CleanStringType.UpperCase:
|
||||
case CleanStringType.Unchanged:
|
||||
term.CopyTo(0, output, opos, len);
|
||||
term.CopyTo(output.Slice(opos, len));
|
||||
opos += len;
|
||||
break;
|
||||
|
||||
case CleanStringType.LowerCase:
|
||||
term = term.ToLower(cultureInfo);
|
||||
term.CopyTo(0, output, opos, term.Length);
|
||||
term.CopyTo(output.Slice(opos, term.Length));
|
||||
opos += term.Length;
|
||||
break;
|
||||
|
||||
case CleanStringType.UpperCase:
|
||||
term = term.ToUpper(cultureInfo);
|
||||
term.CopyTo(0, output, opos, term.Length);
|
||||
term.CopyTo(output.Slice(opos, term.Length));
|
||||
opos += term.Length;
|
||||
break;
|
||||
|
||||
@@ -532,7 +533,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
{
|
||||
s = term.Substring(ipos, 2);
|
||||
s = opos == 0 ? s.ToLower(cultureInfo) : s.ToUpper(cultureInfo);
|
||||
s.CopyTo(0, output, opos, s.Length);
|
||||
s.CopyTo(output.Slice(opos, s.Length));
|
||||
opos += s.Length;
|
||||
i++; // surrogate pair len is 2
|
||||
}
|
||||
@@ -543,7 +544,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
if (len > i)
|
||||
{
|
||||
term = term.Substring(i).ToLower(cultureInfo);
|
||||
term.CopyTo(0, output, opos, term.Length);
|
||||
term.CopyTo(output.Slice(opos, term.Length));
|
||||
opos += term.Length;
|
||||
}
|
||||
break;
|
||||
@@ -555,7 +556,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
{
|
||||
s = term.Substring(ipos, 2);
|
||||
s = s.ToUpper(cultureInfo);
|
||||
s.CopyTo(0, output, opos, s.Length);
|
||||
s.CopyTo(output.Slice(opos, s.Length));
|
||||
opos += s.Length;
|
||||
i++; // surrogate pair len is 2
|
||||
}
|
||||
@@ -566,7 +567,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
if (len > i)
|
||||
{
|
||||
term = term.Substring(i).ToLower(cultureInfo);
|
||||
term.CopyTo(0, output, opos, term.Length);
|
||||
term.CopyTo(output.Slice(opos, term.Length));
|
||||
opos += term.Length;
|
||||
}
|
||||
break;
|
||||
@@ -578,7 +579,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
{
|
||||
s = term.Substring(ipos, 2);
|
||||
s = opos == 0 ? s : s.ToUpper(cultureInfo);
|
||||
s.CopyTo(0, output, opos, s.Length);
|
||||
s.CopyTo(output.Slice(opos, s.Length));
|
||||
opos += s.Length;
|
||||
i++; // surrogate pair len is 2
|
||||
}
|
||||
@@ -589,7 +590,7 @@ namespace Umbraco.Cms.Core.Strings
|
||||
if (len > i)
|
||||
{
|
||||
term = term.Substring(i);
|
||||
term.CopyTo(0, output, opos, term.Length);
|
||||
term.CopyTo(output.Slice(opos, term.Length));
|
||||
opos += term.Length;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -11,21 +11,27 @@ namespace Umbraco.Cms.Core.Strings;
|
||||
/// </remarks>
|
||||
public static class Utf8ToAsciiConverter
|
||||
{
|
||||
[Obsolete("Use ToAsciiString(ReadOnlySpan<char>..) instead")]
|
||||
public static string ToAsciiString(string text, char fail = '?')
|
||||
{
|
||||
return ToAsciiString(text.AsSpan(), fail);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts an Utf8 string into an Ascii string.
|
||||
/// </summary>
|
||||
/// <param name="text">The text to convert.</param>
|
||||
/// <param name="fail">The character to use to replace characters that cannot properly be converted.</param>
|
||||
/// <returns>The converted text.</returns>
|
||||
public static string ToAsciiString(string text, char fail = '?')
|
||||
public static string ToAsciiString(ReadOnlySpan<char> text, char fail = '?')
|
||||
{
|
||||
var input = text.ToCharArray();
|
||||
|
||||
// this is faster although it uses more memory
|
||||
// but... we should be filtering short strings only...
|
||||
var output = new char[input.Length * 3]; // *3 because of things such as OE
|
||||
var len = ToAscii(input, output, fail);
|
||||
return new string(output, 0, len);
|
||||
|
||||
var totalSize = text.Length * 3;
|
||||
Span<char> output = totalSize <= 1024 ? stackalloc char[totalSize] : new char[totalSize]; // *3 because of things such as OE
|
||||
var len = ToAscii(text, output, fail);
|
||||
return new string(output[..len]);
|
||||
|
||||
// var output = new StringBuilder(input.Length + 16); // default is 16, start with at least input length + little extra
|
||||
// ToAscii(input, output);
|
||||
@@ -66,7 +72,7 @@ public static class Utf8ToAsciiConverter
|
||||
/// <returns>The number of characters in the output array.</returns>
|
||||
/// <remarks>The caller must ensure that the output array is big enough.</remarks>
|
||||
/// <exception cref="OverflowException">The output array is not big enough.</exception>
|
||||
private static int ToAscii(char[] input, char[] output, char fail = '?')
|
||||
private static int ToAscii(ReadOnlySpan<char> input, Span<char> output, char fail = '?')
|
||||
{
|
||||
var opos = 0;
|
||||
|
||||
@@ -121,7 +127,7 @@ public static class Utf8ToAsciiConverter
|
||||
/// <para>Input should contain Utf8 characters exclusively and NOT Unicode.</para>
|
||||
/// <para>Removes controls, normalizes whitespaces, replaces symbols by '?'.</para>
|
||||
/// </remarks>
|
||||
private static void ToAscii(char[] input, int ipos, char[] output, ref int opos, char fail = '?')
|
||||
private static void ToAscii(ReadOnlySpan<char> input, int ipos, Span<char> output, ref int opos, char fail = '?')
|
||||
{
|
||||
var c = input[ipos];
|
||||
|
||||
|
||||
3626
tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs
Normal file
3626
tests/Umbraco.Tests.Benchmarks/OldUtf8ToAsciiConverter.cs
Normal file
File diff suppressed because it is too large
Load Diff
108
tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs
Normal file
108
tests/Umbraco.Tests.Benchmarks/ShortStringHelperBenchmarks.cs
Normal file
@@ -0,0 +1,108 @@
|
||||
using System;
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Umbraco.Cms.Core.Strings;
|
||||
using Umbraco.Tests.Benchmarks.Config;
|
||||
|
||||
namespace Umbraco.Tests.Benchmarks;
|
||||
|
||||
[QuickRunWithMemoryDiagnoserConfig]
|
||||
public class ShortStringHelperBenchmarks
|
||||
{
|
||||
private DefaultShortStringHelper _shortStringHelper;
|
||||
|
||||
private string _input;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
_shortStringHelper = new DefaultShortStringHelper(new DefaultShortStringHelperConfig());
|
||||
_input = "This is a 🎈 balloon";
|
||||
}
|
||||
|
||||
[Benchmark(Baseline = true)]
|
||||
public void ToUrlSegment()
|
||||
{
|
||||
_shortStringHelper.CleanStringForUrlSegment(_input);
|
||||
}
|
||||
|
||||
/*[Benchmark(Baseline = true)]
|
||||
public string OldAsciString()
|
||||
{
|
||||
return OldUtf8ToAsciiConverter.ToAsciiString(_input);
|
||||
}
|
||||
|
||||
|
||||
[Benchmark]
|
||||
public string NewAsciString()
|
||||
{
|
||||
return Utf8ToAsciiConverter.ToAsciiString(_input);
|
||||
}*/
|
||||
|
||||
#region SurrogatePairs
|
||||
|
||||
/*[Benchmark(Baseline = true)]
|
||||
public string RemoveSurrogatePairs()
|
||||
{
|
||||
var input = _input.ToCharArray();
|
||||
var output = new char[input.Length];
|
||||
var opos = 0;
|
||||
|
||||
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||
{
|
||||
var c = input[ipos];
|
||||
if (char.IsSurrogate(c)) // ignore high surrogate
|
||||
{
|
||||
ipos++; // and skip low surrogate
|
||||
output[opos++] = '?';
|
||||
}
|
||||
else
|
||||
{
|
||||
output[opos++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
return new string(output, 0, opos);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public string RemoveNewSurrogatePairs()
|
||||
{
|
||||
var input = _input.AsSpan();
|
||||
Span<char> output = input.Length <= 1024 ? stackalloc char[input.Length] : new char[input.Length];
|
||||
var opos = 0;
|
||||
|
||||
for (var ipos = 0; ipos < input.Length; ipos++)
|
||||
{
|
||||
var c = input[ipos];
|
||||
if (char.IsSurrogate(c)) // ignore high surrogate
|
||||
{
|
||||
ipos++; // and skip low surrogate
|
||||
output[opos++] = '?';
|
||||
}
|
||||
else
|
||||
{
|
||||
output[opos++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
return new string(output);
|
||||
}*/
|
||||
|
||||
#endregion
|
||||
|
||||
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||
//|-----------------------------------:|---------:|---------:|--------:|------:|-------:|----------:|
|
||||
//| ToUrlSegment | 464.2 ns | 34.88 ns | 1.91 ns | 1.00 | 0.1627 | 512 B |
|
||||
//| ToUrlSegment (With below changes) | 455.7 ns | 26.83 ns | 1.47 ns | 1.00 | 0.1182 | 384 B |
|
||||
//| ToUrlSegment(CleanCodeString change| 420.6 ns | 64.06 ns | 3.51 ns | 1.00 | 0.0856 | 280 B |
|
||||
|
||||
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||
//|------------------------ |---------:|----------:|---------:|------:|-------:|----------:|
|
||||
//| RemoveSurrogatePairs | 70.75 ns | 15.307 ns | 0.839 ns | 1.00 | 0.0610 | 192 B |
|
||||
//| RemoveNewSurrogatePairs | 58.44 ns | 7.297 ns | 0.400 ns | 0.83 | 0.0198 | 64 B |
|
||||
|
||||
//| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated |
|
||||
//|-------------- |---------:|---------:|--------:|------:|-------:|----------:|
|
||||
//| OldAsciString | 181.4 ns | 11.50 ns | 0.63 ns | 1.00 | 0.0851 | 272 B |
|
||||
//| NewAsciString | 180.7 ns | 5.35 ns | 0.29 ns | 1.00 | 0.0450 | 64 B |
|
||||
}
|
||||
Reference in New Issue
Block a user