From 610976c41ccea1ac19ebedf5f22086a9840975ac Mon Sep 17 00:00:00 2001 From: yv01p Date: Fri, 12 Dec 2025 23:11:24 +0000 Subject: [PATCH] perf(strings): establish Utf8ToAsciiConverter baseline benchmarks --- .../utf8-converter-baseline-2025-11-27.md | 44 +++++++++++++ .../BenchmarkTextGenerator.cs | 63 +++++++++++++++++++ .../Utf8ToAsciiConverterBaselineBenchmarks.cs | 52 +++++++++++++++ 3 files changed, 159 insertions(+) create mode 100644 docs/benchmarks/utf8-converter-baseline-2025-11-27.md create mode 100644 tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs create mode 100644 tests/Umbraco.Tests.Benchmarks/Utf8ToAsciiConverterBaselineBenchmarks.cs diff --git a/docs/benchmarks/utf8-converter-baseline-2025-11-27.md b/docs/benchmarks/utf8-converter-baseline-2025-11-27.md new file mode 100644 index 0000000000..0705e5948d --- /dev/null +++ b/docs/benchmarks/utf8-converter-baseline-2025-11-27.md @@ -0,0 +1,44 @@ +# Utf8ToAsciiConverter Baseline Benchmarks + +**Date:** 2025-11-27 +**Implementation:** Original 3,631-line switch statement +**Runtime:** .NET 10.0 + +## Results + +``` +BenchmarkDotNet v0.15.6, Linux Ubuntu 25.10 (Questing Quokka) +Intel Xeon CPU 2.80GHz, 1 CPU, 16 logical and 8 physical cores +.NET SDK 10.0.100 + [Host] : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4 + DefaultJob : .NET 10.0.0 (10.0.0, 10.0.25.52411), X64 RyuJIT x86-64-v4 +``` + +| Method | Mean | Error | StdDev | Rank | Gen0 | Gen1 | Gen2 | Allocated | +|----------------------- |----------------:|--------------:|--------------:|-----:|---------:|---------:|---------:|----------:| +| Tiny_Ascii | 82.81 ns | 0.402 ns | 0.314 ns | 2 | 0.0027 | - | - | 48 B | +| Tiny_Mixed | 71.05 ns | 0.225 ns | 0.176 ns | 1 | 0.0027 | - | - | 48 B | +| Small_Ascii | 695.75 ns | 4.394 ns | 3.669 ns | 3 | 0.0124 | - | - | 224 B | +| Small_Mixed | 686.54 ns | 8.868 ns | 8.295 ns | 3 | 0.0124 | - | - | 224 B | +| Medium_Ascii | 5,994.68 ns | 32.905 ns | 30.779 ns | 4 | 0.4730 | - | - | 8240 B | +| Medium_Mixed | 7,116.65 ns | 27.489 ns | 22.955 ns | 5 | 0.4730 | - | - | 8264 B | +| Large_Ascii | 593,733.29 ns | 2,040.378 ns | 1,703.808 ns | 7 | 249.0234 | 249.0234 | 249.0234 | 819332 B | +| Large_Mixed | 1,066,297.43 ns | 8,507.650 ns | 7,958.061 ns | 8 | 248.0469 | 248.0469 | 248.0469 | 823523 B | +| Large_WorstCase | 2,148,169.56 ns | 16,455.374 ns | 15,392.367 ns | 9 | 246.0938 | 246.0938 | 246.0938 | 1024125 B | +| CharArray_Medium_Mixed | 7,357.24 ns | 59.719 ns | 55.861 ns | 6 | 0.5951 | 0.0076 | - | 10336 B | + +## Notes + +- Baseline before SIMD refactor +- Used as comparison target for Task 7 +- Original implementation uses 3,631-line switch statement for character mappings +- All benchmarks allocate new strings on every call +- Large_WorstCase (Cyrillic text) is the slowest at ~2.1ms for 100KB + +## Key Observations + +1. **Pure ASCII performance**: 82.81 ns for 10 characters, 593 µs for 100KB +2. **Mixed content performance**: 71.05 ns for 10 characters, 1.07 ms for 100KB +3. **Worst case (Cyrillic)**: 2.15 ms for 100KB (2x slower than mixed) +4. **Memory allocation**: Linear with input size, plus overhead for output string +5. **GC pressure**: Significant Gen0/Gen1/Gen2 collections on large inputs diff --git a/tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs b/tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs new file mode 100644 index 0000000000..a7da843e63 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/BenchmarkTextGenerator.cs @@ -0,0 +1,63 @@ +using System.Text; + +namespace Umbraco.Tests.Benchmarks; + +public static class BenchmarkTextGenerator +{ + private const int Seed = 42; + + private static readonly char[] AsciiAlphaNum = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".ToCharArray(); + + private static readonly char[] AsciiPunctuation = + " .,;:!?-_'\"()".ToCharArray(); + + private static readonly char[] LatinAccented = + "àáâãäåæèéêëìíîïñòóôõöøùúûüýÿÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝŸœŒßðÐþÞ".ToCharArray(); + + private static readonly char[] Cyrillic = + "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя".ToCharArray(); + + private static readonly char[] Symbols = + "©®™€£¥°±×÷§¶†‡•".ToCharArray(); + + private static readonly char[] WorstCaseCyrillic = + "ЩЮЯЖЧШщюяжчш".ToCharArray(); + + public static string GeneratePureAscii(int length) => + GenerateFromCharset(length, AsciiAlphaNum); + + public static string GenerateMixed(int length) + { + var random = new Random(Seed); + var sb = new StringBuilder(length); + + for (int i = 0; i < length; i++) + { + var roll = random.Next(100); + var charset = roll switch + { + < 70 => AsciiAlphaNum, + < 85 => AsciiPunctuation, + < 95 => LatinAccented, + < 99 => Cyrillic, + _ => Symbols + }; + sb.Append(charset[random.Next(charset.Length)]); + } + + return sb.ToString(); + } + + public static string GenerateWorstCase(int length) => + GenerateFromCharset(length, WorstCaseCyrillic); + + private static string GenerateFromCharset(int length, char[] charset) + { + var random = new Random(Seed); + var sb = new StringBuilder(length); + for (int i = 0; i < length; i++) + sb.Append(charset[random.Next(charset.Length)]); + return sb.ToString(); + } +} diff --git a/tests/Umbraco.Tests.Benchmarks/Utf8ToAsciiConverterBaselineBenchmarks.cs b/tests/Umbraco.Tests.Benchmarks/Utf8ToAsciiConverterBaselineBenchmarks.cs new file mode 100644 index 0000000000..b9ac47f0f8 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/Utf8ToAsciiConverterBaselineBenchmarks.cs @@ -0,0 +1,52 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Columns; +using BenchmarkDotNet.Jobs; +using Umbraco.Cms.Core.Strings; + +namespace Umbraco.Tests.Benchmarks; + +[MemoryDiagnoser] +[RankColumn] +[StatisticalTestColumn] +public class Utf8ToAsciiConverterBaselineBenchmarks +{ + private static readonly string TinyAscii = BenchmarkTextGenerator.GeneratePureAscii(10); + private static readonly string TinyMixed = BenchmarkTextGenerator.GenerateMixed(10); + private static readonly string SmallAscii = BenchmarkTextGenerator.GeneratePureAscii(100); + private static readonly string SmallMixed = BenchmarkTextGenerator.GenerateMixed(100); + private static readonly string MediumAscii = BenchmarkTextGenerator.GeneratePureAscii(1024); + private static readonly string MediumMixed = BenchmarkTextGenerator.GenerateMixed(1024); + private static readonly string LargeAscii = BenchmarkTextGenerator.GeneratePureAscii(100 * 1024); + private static readonly string LargeMixed = BenchmarkTextGenerator.GenerateMixed(100 * 1024); + private static readonly string LargeWorstCase = BenchmarkTextGenerator.GenerateWorstCase(100 * 1024); + + [Benchmark] + public string Tiny_Ascii() => Utf8ToAsciiConverter.ToAsciiString(TinyAscii); + + [Benchmark] + public string Tiny_Mixed() => Utf8ToAsciiConverter.ToAsciiString(TinyMixed); + + [Benchmark] + public string Small_Ascii() => Utf8ToAsciiConverter.ToAsciiString(SmallAscii); + + [Benchmark] + public string Small_Mixed() => Utf8ToAsciiConverter.ToAsciiString(SmallMixed); + + [Benchmark] + public string Medium_Ascii() => Utf8ToAsciiConverter.ToAsciiString(MediumAscii); + + [Benchmark] + public string Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiString(MediumMixed); + + [Benchmark] + public string Large_Ascii() => Utf8ToAsciiConverter.ToAsciiString(LargeAscii); + + [Benchmark] + public string Large_Mixed() => Utf8ToAsciiConverter.ToAsciiString(LargeMixed); + + [Benchmark] + public string Large_WorstCase() => Utf8ToAsciiConverter.ToAsciiString(LargeWorstCase); + + [Benchmark] + public char[] CharArray_Medium_Mixed() => Utf8ToAsciiConverter.ToAsciiCharArray(MediumMixed); +}