perf(strings): establish Utf8ToAsciiConverter baseline benchmarks

This commit is contained in:
2025-12-12 23:11:24 +00:00
parent 475010148b
commit 610976c41c
3 changed files with 159 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
# Utf8ToAsciiConverter Baseline Benchmarks
**Date:** 2025-11-27
**Implementation:** Original 3,631-line switch statement
**Runtime:** .NET 10.0
## Results
```
BenchmarkDotNet v0.15.6, Linux Ubuntu 25.10 (Questing Quokka)
Intel Xeon CPU 2.80GHz, 1 CPU, 16 logical and 8 physical cores
.NET SDK 10.0.100
[Host] : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4
DefaultJob : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4
```
| Method | Mean | Error | StdDev | Rank | Gen0 | Gen1 | Gen2 | Allocated |
|----------------------- |----------------:|--------------:|--------------:|-----:|---------:|---------:|---------:|----------:|
| Tiny_Ascii | 82.81 ns | 0.402 ns | 0.314 ns | 2 | 0.0027 | - | - | 48 B |
| Tiny_Mixed | 71.05 ns | 0.225 ns | 0.176 ns | 1 | 0.0027 | - | - | 48 B |
| Small_Ascii | 695.75 ns | 4.394 ns | 3.669 ns | 3 | 0.0124 | - | - | 224 B |
| Small_Mixed | 686.54 ns | 8.868 ns | 8.295 ns | 3 | 0.0124 | - | - | 224 B |
| Medium_Ascii | 5,994.68 ns | 32.905 ns | 30.779 ns | 4 | 0.4730 | - | - | 8240 B |
| Medium_Mixed | 7,116.65 ns | 27.489 ns | 22.955 ns | 5 | 0.4730 | - | - | 8264 B |
| Large_Ascii | 593,733.29 ns | 2,040.378 ns | 1,703.808 ns | 7 | 249.0234 | 249.0234 | 249.0234 | 819332 B |
| Large_Mixed | 1,066,297.43 ns | 8,507.650 ns | 7,958.061 ns | 8 | 248.0469 | 248.0469 | 248.0469 | 823523 B |
| Large_WorstCase | 2,148,169.56 ns | 16,455.374 ns | 15,392.367 ns | 9 | 246.0938 | 246.0938 | 246.0938 | 1024125 B |
| CharArray_Medium_Mixed | 7,357.24 ns | 59.719 ns | 55.861 ns | 6 | 0.5951 | 0.0076 | - | 10336 B |
## Notes
- Baseline before SIMD refactor
- Used as comparison target for Task 7
- Original implementation uses 3,631-line switch statement for character mappings
- All benchmarks allocate new strings on every call
- Large_WorstCase (Cyrillic text) is the slowest at ~2.1ms for 100KB
## Key Observations
1. **Pure ASCII performance**: 82.81 ns for 10 characters, 593 µs for 100KB
2. **Mixed content performance**: 71.05 ns for 10 characters, 1.07 ms for 100KB
3. **Worst case (Cyrillic)**: 2.15 ms for 100KB (2x slower than mixed)
4. **Memory allocation**: Linear with input size, plus overhead for output string
5. **GC pressure**: Significant Gen0/Gen1/Gen2 collections on large inputs

View File

@@ -0,0 +1,63 @@
using System.Text;
namespace Umbraco.Tests.Benchmarks;
public static class BenchmarkTextGenerator
{
private const int Seed = 42;
private static readonly char[] AsciiAlphaNum =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".ToCharArray();
private static readonly char[] AsciiPunctuation =
" .,;:!?-_'\"()".ToCharArray();
private static readonly char[] LatinAccented =
"àáâãäåæèéêëìíîïñòóôõöøùúûüýÿÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝŸœŒßðÐþÞ".ToCharArray();
private static readonly char[] Cyrillic =
"АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя".ToCharArray();
private static readonly char[] Symbols =
"©®™€£¥°±×÷§¶†‡•".ToCharArray();
private static readonly char[] WorstCaseCyrillic =
"ЩЮЯЖЧШщюяжчш".ToCharArray();
public static string GeneratePureAscii(int length) =>
GenerateFromCharset(length, AsciiAlphaNum);
public static string GenerateMixed(int length)
{
var random = new Random(Seed);
var sb = new StringBuilder(length);
for (int i = 0; i < length; i++)
{
var roll = random.Next(100);
var charset = roll switch
{
< 70 => AsciiAlphaNum,
< 85 => AsciiPunctuation,
< 95 => LatinAccented,
< 99 => Cyrillic,
_ => Symbols
};
sb.Append(charset[random.Next(charset.Length)]);
}
return sb.ToString();
}
public static string GenerateWorstCase(int length) =>
GenerateFromCharset(length, WorstCaseCyrillic);
private static string GenerateFromCharset(int length, char[] charset)
{
var random = new Random(Seed);
var sb = new StringBuilder(length);
for (int i = 0; i < length; i++)
sb.Append(charset[random.Next(charset.Length)]);
return sb.ToString();
}
}

View File

@@ -0,0 +1,52 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Jobs;
using Umbraco.Cms.Core.Strings;
namespace Umbraco.Tests.Benchmarks;
[MemoryDiagnoser]
[RankColumn]
[StatisticalTestColumn]
public class Utf8ToAsciiConverterBaselineBenchmarks
{
private static readonly string TinyAscii = BenchmarkTextGenerator.GeneratePureAscii(10);
private static readonly string TinyMixed = BenchmarkTextGenerator.GenerateMixed(10);
private static readonly string SmallAscii = BenchmarkTextGenerator.GeneratePureAscii(100);
private static readonly string SmallMixed = BenchmarkTextGenerator.GenerateMixed(100);
private static readonly string MediumAscii = BenchmarkTextGenerator.GeneratePureAscii(1024);
private static readonly string MediumMixed = BenchmarkTextGenerator.GenerateMixed(1024);
private static readonly string LargeAscii = BenchmarkTextGenerator.GeneratePureAscii(100 * 1024);
private static readonly string LargeMixed = BenchmarkTextGenerator.GenerateMixed(100 * 1024);
private static readonly string LargeWorstCase = BenchmarkTextGenerator.GenerateWorstCase(100 * 1024);
[Benchmark]
public string Tiny_Ascii() => Utf8ToAsciiConverter.ToAsciiString(TinyAscii);
[Benchmark]
public string Tiny_Mixed() => Utf8ToAsciiConverter.ToAsciiString(TinyMixed);
[Benchmark]
public string Small_Ascii() => Utf8ToAsciiConverter.ToAsciiString(SmallAscii);
[Benchmark]
public string Small_Mixed() => Utf8ToAsciiConverter.ToAsciiString(SmallMixed);
[Benchmark]
public string Medium_Ascii() => Utf8ToAsciiConverter.ToAsciiString(MediumAscii);
[Benchmark]
public string Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiString(MediumMixed);
[Benchmark]
public string Large_Ascii() => Utf8ToAsciiConverter.ToAsciiString(LargeAscii);
[Benchmark]
public string Large_Mixed() => Utf8ToAsciiConverter.ToAsciiString(LargeMixed);
[Benchmark]
public string Large_WorstCase() => Utf8ToAsciiConverter.ToAsciiString(LargeWorstCase);
[Benchmark]
public char[] CharArray_Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiCharArray(MediumMixed);
}