perf(strings): establish Utf8ToAsciiConverter baseline benchmarks
This commit is contained in:
44
docs/benchmarks/utf8-converter-baseline-2025-11-27.md
Normal file
44
docs/benchmarks/utf8-converter-baseline-2025-11-27.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# Utf8ToAsciiConverter Baseline Benchmarks
|
||||
|
||||
**Date:** 2025-11-27
|
||||
**Implementation:** Original 3,631-line switch statement
|
||||
**Runtime:** .NET 10.0
|
||||
|
||||
## Results
|
||||
|
||||
```
|
||||
BenchmarkDotNet v0.15.6, Linux Ubuntu 25.10 (Questing Quokka)
|
||||
Intel Xeon CPU 2.80GHz, 1 CPU, 16 logical and 8 physical cores
|
||||
.NET SDK 10.0.100
|
||||
[Host] : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4
|
||||
DefaultJob : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4
|
||||
```
|
||||
|
||||
| Method | Mean | Error | StdDev | Rank | Gen0 | Gen1 | Gen2 | Allocated |
|
||||
|----------------------- |----------------:|--------------:|--------------:|-----:|---------:|---------:|---------:|----------:|
|
||||
| Tiny_Ascii | 82.81 ns | 0.402 ns | 0.314 ns | 2 | 0.0027 | - | - | 48 B |
|
||||
| Tiny_Mixed | 71.05 ns | 0.225 ns | 0.176 ns | 1 | 0.0027 | - | - | 48 B |
|
||||
| Small_Ascii | 695.75 ns | 4.394 ns | 3.669 ns | 3 | 0.0124 | - | - | 224 B |
|
||||
| Small_Mixed | 686.54 ns | 8.868 ns | 8.295 ns | 3 | 0.0124 | - | - | 224 B |
|
||||
| Medium_Ascii | 5,994.68 ns | 32.905 ns | 30.779 ns | 4 | 0.4730 | - | - | 8240 B |
|
||||
| Medium_Mixed | 7,116.65 ns | 27.489 ns | 22.955 ns | 5 | 0.4730 | - | - | 8264 B |
|
||||
| Large_Ascii | 593,733.29 ns | 2,040.378 ns | 1,703.808 ns | 7 | 249.0234 | 249.0234 | 249.0234 | 819332 B |
|
||||
| Large_Mixed | 1,066,297.43 ns | 8,507.650 ns | 7,958.061 ns | 8 | 248.0469 | 248.0469 | 248.0469 | 823523 B |
|
||||
| Large_WorstCase | 2,148,169.56 ns | 16,455.374 ns | 15,392.367 ns | 9 | 246.0938 | 246.0938 | 246.0938 | 1024125 B |
|
||||
| CharArray_Medium_Mixed | 7,357.24 ns | 59.719 ns | 55.861 ns | 6 | 0.5951 | 0.0076 | - | 10336 B |
|
||||
|
||||
## Notes
|
||||
|
||||
- Baseline before SIMD refactor
|
||||
- Used as comparison target for Task 7
|
||||
- Original implementation uses 3,631-line switch statement for character mappings
|
||||
- All benchmarks allocate new strings on every call
|
||||
- Large_WorstCase (Cyrillic text) is the slowest at ~2.1ms for 100KB
|
||||
|
||||
## Key Observations
|
||||
|
||||
1. **Pure ASCII performance**: 82.81 ns for 10 characters, 593 µs for 100KB
|
||||
2. **Mixed content performance**: 71.05 ns for 10 characters, 1.07 ms for 100KB
|
||||
3. **Worst case (Cyrillic)**: 2.15 ms for 100KB (2x slower than mixed)
|
||||
4. **Memory allocation**: Linear with input size, plus overhead for output string
|
||||
5. **GC pressure**: Significant Gen0/Gen1/Gen2 collections on large inputs
|
||||
63
tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs
Normal file
63
tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs
Normal file
@@ -0,0 +1,63 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Umbraco.Tests.Benchmarks;
|
||||
|
||||
public static class BenchmarkTextGenerator
|
||||
{
|
||||
private const int Seed = 42;
|
||||
|
||||
private static readonly char[] AsciiAlphaNum =
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".ToCharArray();
|
||||
|
||||
private static readonly char[] AsciiPunctuation =
|
||||
" .,;:!?-_'\"()".ToCharArray();
|
||||
|
||||
private static readonly char[] LatinAccented =
|
||||
"àáâãäåæèéêëìíîïñòóôõöøùúûüýÿÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝŸœŒßðÐþÞ".ToCharArray();
|
||||
|
||||
private static readonly char[] Cyrillic =
|
||||
"АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя".ToCharArray();
|
||||
|
||||
private static readonly char[] Symbols =
|
||||
"©®™€£¥°±×÷§¶†‡•".ToCharArray();
|
||||
|
||||
private static readonly char[] WorstCaseCyrillic =
|
||||
"ЩЮЯЖЧШщюяжчш".ToCharArray();
|
||||
|
||||
public static string GeneratePureAscii(int length) =>
|
||||
GenerateFromCharset(length, AsciiAlphaNum);
|
||||
|
||||
public static string GenerateMixed(int length)
|
||||
{
|
||||
var random = new Random(Seed);
|
||||
var sb = new StringBuilder(length);
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
var roll = random.Next(100);
|
||||
var charset = roll switch
|
||||
{
|
||||
< 70 => AsciiAlphaNum,
|
||||
< 85 => AsciiPunctuation,
|
||||
< 95 => LatinAccented,
|
||||
< 99 => Cyrillic,
|
||||
_ => Symbols
|
||||
};
|
||||
sb.Append(charset[random.Next(charset.Length)]);
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public static string GenerateWorstCase(int length) =>
|
||||
GenerateFromCharset(length, WorstCaseCyrillic);
|
||||
|
||||
private static string GenerateFromCharset(int length, char[] charset)
|
||||
{
|
||||
var random = new Random(Seed);
|
||||
var sb = new StringBuilder(length);
|
||||
for (int i = 0; i < length; i++)
|
||||
sb.Append(charset[random.Next(charset.Length)]);
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Columns;
|
||||
using BenchmarkDotNet.Jobs;
|
||||
using Umbraco.Cms.Core.Strings;
|
||||
|
||||
namespace Umbraco.Tests.Benchmarks;
|
||||
|
||||
[MemoryDiagnoser]
|
||||
[RankColumn]
|
||||
[StatisticalTestColumn]
|
||||
public class Utf8ToAsciiConverterBaselineBenchmarks
|
||||
{
|
||||
private static readonly string TinyAscii = BenchmarkTextGenerator.GeneratePureAscii(10);
|
||||
private static readonly string TinyMixed = BenchmarkTextGenerator.GenerateMixed(10);
|
||||
private static readonly string SmallAscii = BenchmarkTextGenerator.GeneratePureAscii(100);
|
||||
private static readonly string SmallMixed = BenchmarkTextGenerator.GenerateMixed(100);
|
||||
private static readonly string MediumAscii = BenchmarkTextGenerator.GeneratePureAscii(1024);
|
||||
private static readonly string MediumMixed = BenchmarkTextGenerator.GenerateMixed(1024);
|
||||
private static readonly string LargeAscii = BenchmarkTextGenerator.GeneratePureAscii(100 * 1024);
|
||||
private static readonly string LargeMixed = BenchmarkTextGenerator.GenerateMixed(100 * 1024);
|
||||
private static readonly string LargeWorstCase = BenchmarkTextGenerator.GenerateWorstCase(100 * 1024);
|
||||
|
||||
[Benchmark]
|
||||
public string Tiny_Ascii() => Utf8ToAsciiConverter.ToAsciiString(TinyAscii);
|
||||
|
||||
[Benchmark]
|
||||
public string Tiny_Mixed() => Utf8ToAsciiConverter.ToAsciiString(TinyMixed);
|
||||
|
||||
[Benchmark]
|
||||
public string Small_Ascii() => Utf8ToAsciiConverter.ToAsciiString(SmallAscii);
|
||||
|
||||
[Benchmark]
|
||||
public string Small_Mixed() => Utf8ToAsciiConverter.ToAsciiString(SmallMixed);
|
||||
|
||||
[Benchmark]
|
||||
public string Medium_Ascii() => Utf8ToAsciiConverter.ToAsciiString(MediumAscii);
|
||||
|
||||
[Benchmark]
|
||||
public string Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiString(MediumMixed);
|
||||
|
||||
[Benchmark]
|
||||
public string Large_Ascii() => Utf8ToAsciiConverter.ToAsciiString(LargeAscii);
|
||||
|
||||
[Benchmark]
|
||||
public string Large_Mixed() => Utf8ToAsciiConverter.ToAsciiString(LargeMixed);
|
||||
|
||||
[Benchmark]
|
||||
public string Large_WorstCase() => Utf8ToAsciiConverter.ToAsciiString(LargeWorstCase);
|
||||
|
||||
[Benchmark]
|
||||
public char[] CharArray_Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiCharArray(MediumMixed);
|
||||
}
|
||||
Reference in New Issue
Block a user