using System.Globalization; using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; using Umbraco.Cms.Core; using Umbraco.Extensions; namespace Umbraco.Tests.Benchmarks; [MemoryDiagnoser] public class StringExtensionsBenchmarks { private static readonly Random _seededRandom = new(60); private const int Size = 100; private static readonly string[] _stringsWithCommaSeparatedNumbers = new string[Size]; static StringExtensionsBenchmarks() { for (var i = 0; i < Size; i++) { int countOfNumbers = _seededRandom.Next(2, 10); // guess on path lengths in normal use int[] randomIds = new int[countOfNumbers]; for (var i1 = 0; i1 < countOfNumbers; i1++) { randomIds[i1] = _seededRandom.Next(-1, int.MaxValue); } _stringsWithCommaSeparatedNumbers[i] = string.Join(',', randomIds); } } /// /// Ye olden way of doing it (before 20250201 https://github.com/umbraco/Umbraco-CMS/pull/18048) /// /// A number so the compiler/runtime doesn't optimize it away. [Benchmark] public int Linq() { var totalNumberOfIds = 0; // a number to operate on so it is not optimized away foreach (string? stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) { totalNumberOfIds += Linq(stringWithCommaSeparatedNumbers).Length; } return totalNumberOfIds; } private static int[] Linq(string path) { int[]? nodeIds = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries) .Select(x => int.TryParse(x, NumberStyles.Integer, CultureInfo.InvariantCulture, out var output) ? Attempt.Succeed(output) : Attempt.Fail()) .Where(x => x.Success) .Select(x => x.Result) .Reverse() .ToArray(); return nodeIds; } /// /// Here we are allocating strings to the separated values, /// BUT we know the count of numbers, so we can allocate the exact size of list we need /// /// A number so the compiler/runtime doesn't optimize it away. [Benchmark] public int SplitToHeapStrings() { var totalNumberOfIds = 0; // a number to operate on so it is not optimized away foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) { totalNumberOfIds += SplitToHeapStrings(stringWithCommaSeparatedNumbers).Length; } return totalNumberOfIds; } private static int[] SplitToHeapStrings(string path) { string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); List nodeIds = new(pathSegments.Length); // here we know how large the resulting list should at least be for (int i = pathSegments.Length - 1; i >= 0; i--) { if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) { nodeIds.Add(pathSegment); } } return nodeIds.ToArray(); // allocates a new array } /// /// Here we avoid allocating strings to the separated values, /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it /// /// A number so the compiler/runtime doesn't optimize it away. [Benchmark] public int SplitToStackSpansWithoutEmptyCheckReversingListAsSpan() { var totalNumberOfIds = 0; // a number to operate on so it is not optimized away foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) { totalNumberOfIds += SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(stringWithCommaSeparatedNumbers).Length; } return totalNumberOfIds; } private static int[] SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(string path) { ReadOnlySpan pathSpan = path.AsSpan(); MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); // Here we do NOT know how large the resulting list should at least be // Default empty List<> internal array capacity on add is currently 4 // If the count of numbers are less than 4, we overallocate a little // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array // If the count of numbers are more than 8, another new array is allocated and copied to List nodeIds = []; foreach (Range rangeOfPathSegment in pathSegments) { // this is only a span of the string, a string is not allocated on the heap ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) { nodeIds.Add(pathSegment); } } Span nodeIdsSpan = CollectionsMarshal.AsSpan(nodeIds); var result = new int[nodeIdsSpan.Length]; var resultIndex = 0; for (int i = nodeIdsSpan.Length - 1; i >= 0; i--) { result[resultIndex++] = nodeIdsSpan[i]; } return result; } /// /// Here we avoid allocating strings to the separated values, /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it /// /// A number so the compiler/runtime doesn't optimize it away. [Benchmark] public int SplitToStackSpansWithoutEmptyCheck() { var totalNumberOfIds = 0; // a number to operate on so it is not optimized away foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) { totalNumberOfIds += SplitToStackSpansWithoutEmptyCheck(stringWithCommaSeparatedNumbers).Length; } return totalNumberOfIds; } private static int[] SplitToStackSpansWithoutEmptyCheck(string path) { ReadOnlySpan pathSpan = path.AsSpan(); MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); // Here we do NOT know how large the resulting list should at least be // Default empty List<> internal array capacity on add is currently 4 // If the count of numbers are less than 4, we overallocate a little // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array // If the count of numbers are more than 8, another new array is allocated and copied to List nodeIds = []; foreach (Range rangeOfPathSegment in pathSegments) { // this is only a span of the string, a string is not allocated on the heap ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) { nodeIds.Add(pathSegment); } } var result = new int[nodeIds.Count]; var resultIndex = 0; for (int i = nodeIds.Count - 1; i >= 0; i--) { result[resultIndex++] = nodeIds[i]; } return result; } /// /// Here we avoid allocating strings to the separated values, /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it /// /// Here with an empty check, unlikely in umbraco use case. /// A number so the compiler/runtime doesn't optimize it away. [Benchmark] public int SplitToStackSpansWithEmptyCheck() { var totalNumberOfIds = 0; // a number to operate on so it is not optimized away foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) { totalNumberOfIds += SplitToStackSpansWithEmptyCheck(stringWithCommaSeparatedNumbers).Length; } return totalNumberOfIds; } private static int[] SplitToStackSpansWithEmptyCheck(string path) { ReadOnlySpan pathSpan = path.AsSpan(); MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); // Here we do NOT know how large the resulting list should at least be // Default empty List<> internal array capacity on add is currently 4 // If the count of numbers are less than 4, we overallocate a little // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array // If the count of numbers are more than 8, another new array is allocated and copied to List nodeIds = []; foreach (Range rangeOfPathSegment in pathSegments) { // this is only a span of the string, a string is not allocated on the heap ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; if (pathSegmentSpan.IsEmpty) { continue; } if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) { nodeIds.Add(pathSegment); } } var result = new int[nodeIds.Count]; var resultIndex = 0; for (int i = nodeIds.Count - 1; i >= 0; i--) { result[resultIndex++] = nodeIds[i]; } return result; } // BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2894) // Intel Core i7-10750H CPU 2.60GHz, 1 CPU, 12 logical and 6 physical cores // .NET Core SDK 3.1.426 [C:\Program Files\dotnet\sdk] // [Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 // // Toolchain=InProcessEmitToolchain // // | Method | Mean | Error | StdDev | Gen0 | Allocated | // |------------------------------------------------------ |---------:|---------:|---------:|-------:|----------:| // | Linq | 46.39 us | 0.515 us | 0.430 us | 9.4604 | 58.31 KB | // | SplitToHeapStrings | 30.28 us | 0.310 us | 0.275 us | 7.0801 | 43.55 KB | // | SplitToStackSpansWithoutEmptyCheckReversingListAsSpan | 20.47 us | 0.290 us | 0.257 us | 2.7161 | 16.73 KB | // | SplitToStackSpansWithoutEmptyCheck | 20.60 us | 0.315 us | 0.280 us | 2.7161 | 16.73 KB | // | SplitToStackSpansWithEmptyCheck | 20.57 us | 0.308 us | 0.288 us | 2.7161 | 16.73 KB | private const string HtmlTestString = "

Hello world

"; private const string WhitespaceTestString = "Hello world\t\ntest string"; private const string FilePathTestString = "path/to/some/file.extension?query=param"; private const string NonAlphanumericTestString = "hello-world_test!@#$%^&*()123"; [Benchmark] public string StripWhitespace_Benchmark() => WhitespaceTestString.StripWhitespace(); [Benchmark] public string GetFileExtension_Benchmark() => FilePathTestString.GetFileExtension(); [Benchmark] public string StripHtml_Benchmark() => HtmlTestString.StripHtml(); [Benchmark] public bool IsLowerCase_Benchmark() => 'a'.IsLowerCase(); [Benchmark] public bool IsUpperCase_Benchmark() => 'A'.IsUpperCase(); [Benchmark] public string ReplaceNonAlphanumericChars_String_Benchmark() => NonAlphanumericTestString.ReplaceNonAlphanumericChars("-"); }