From d9a31d337a92cdd90128127430047c5e38df875e Mon Sep 17 00:00:00 2001 From: Henrik Date: Mon, 7 Apr 2025 16:56:59 +0200 Subject: [PATCH] Avoid allocating strings for parsing comma separated int values (#18199) --- .../Extensions/StringExtensions.cs | 18 +- .../Services/PublicAccessService.cs | 2 +- src/Umbraco.Core/Services/UserService.cs | 2 +- .../StringExtensionsBenchmarks.cs | 257 ++++++++++++++++++ 4 files changed, 272 insertions(+), 7 deletions(-) create mode 100644 tests/Umbraco.Tests.Benchmarks/StringExtensionsBenchmarks.cs diff --git a/src/Umbraco.Core/Extensions/StringExtensions.cs b/src/Umbraco.Core/Extensions/StringExtensions.cs index b7f066330c..eab9d3fabf 100644 --- a/src/Umbraco.Core/Extensions/StringExtensions.cs +++ b/src/Umbraco.Core/Extensions/StringExtensions.cs @@ -5,6 +5,7 @@ using System.ComponentModel; using System.ComponentModel.DataAnnotations; using System.Diagnostics.CodeAnalysis; using System.Globalization; +using System.Runtime.InteropServices; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; @@ -57,17 +58,24 @@ public static class StringExtensions /// public static int[] GetIdsFromPathReversed(this string path) { - string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); - List nodeIds = new(pathSegments.Length); - for (int i = pathSegments.Length - 1; i >= 0; i--) + ReadOnlySpan pathSpan = path.AsSpan(); + List nodeIds = []; + foreach (Range rangeOfPathSegment in pathSpan.Split(Constants.CharArrays.Comma)) { - if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + if (int.TryParse(pathSpan[rangeOfPathSegment], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) { nodeIds.Add(pathSegment); } } - return nodeIds.ToArray(); + var result = new int[nodeIds.Count]; + var resultIndex = 0; + for (int i = nodeIds.Count - 1; i >= 0; i--) + { + result[resultIndex++] = nodeIds[i]; + } + + return result; } /// diff --git a/src/Umbraco.Core/Services/PublicAccessService.cs b/src/Umbraco.Core/Services/PublicAccessService.cs index 24bdbc78c0..6919ab8d3f 100644 --- a/src/Umbraco.Core/Services/PublicAccessService.cs +++ b/src/Umbraco.Core/Services/PublicAccessService.cs @@ -14,7 +14,7 @@ using Umbraco.Extensions; namespace Umbraco.Cms.Core.Services; -internal class PublicAccessService : RepositoryService, IPublicAccessService +internal sealed class PublicAccessService : RepositoryService, IPublicAccessService { private readonly IPublicAccessRepository _publicAccessRepository; private readonly IEntityService _entityService; diff --git a/src/Umbraco.Core/Services/UserService.cs b/src/Umbraco.Core/Services/UserService.cs index c392011eca..02a1337007 100644 --- a/src/Umbraco.Core/Services/UserService.cs +++ b/src/Umbraco.Core/Services/UserService.cs @@ -2617,7 +2617,7 @@ internal partial class UserService : RepositoryService, IUserService { if (pathIds.Length == 0) { - return new EntityPermissionCollection(Enumerable.Empty()); + return new EntityPermissionCollection([]); } // get permissions for all nodes in the path by group diff --git a/tests/Umbraco.Tests.Benchmarks/StringExtensionsBenchmarks.cs b/tests/Umbraco.Tests.Benchmarks/StringExtensionsBenchmarks.cs new file mode 100644 index 0000000000..4f4a73c2c6 --- /dev/null +++ b/tests/Umbraco.Tests.Benchmarks/StringExtensionsBenchmarks.cs @@ -0,0 +1,257 @@ +using System.Globalization; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; +using Umbraco.Cms.Core; + +namespace Umbraco.Tests.Benchmarks; + +[MemoryDiagnoser] +public class StringExtensionsBenchmarks +{ + private static readonly Random _seededRandom = new(60); + private const int Size = 100; + private static readonly string[] _stringsWithCommaSeparatedNumbers = new string[Size]; + + static StringExtensionsBenchmarks() + { + for (var i = 0; i < Size; i++) + { + int countOfNumbers = _seededRandom.Next(2, 10); // guess on path lengths in normal use + int[] randomIds = new int[countOfNumbers]; + for (var i1 = 0; i1 < countOfNumbers; i1++) + { + randomIds[i1] = _seededRandom.Next(-1, int.MaxValue); + } + + _stringsWithCommaSeparatedNumbers[i] = string.Join(',', randomIds); + } + } + + /// + /// Ye olden way of doing it (before 20250201 https://github.com/umbraco/Umbraco-CMS/pull/18048) + /// + /// A number so the compiler/runtime doesn't optimize it away. + [Benchmark] + public int Linq() + { + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away + foreach (string? stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) + { + totalNumberOfIds += Linq(stringWithCommaSeparatedNumbers).Length; + } + + return totalNumberOfIds; + } + + private static int[] Linq(string path) + { + int[]? nodeIds = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries) + .Select(x => + int.TryParse(x, NumberStyles.Integer, CultureInfo.InvariantCulture, out var output) + ? Attempt.Succeed(output) + : Attempt.Fail()) + .Where(x => x.Success) + .Select(x => x.Result) + .Reverse() + .ToArray(); + return nodeIds; + } + + /// + /// Here we are allocating strings to the separated values, + /// BUT we know the count of numbers, so we can allocate the exact size of list we need + /// + /// A number so the compiler/runtime doesn't optimize it away. + [Benchmark] + public int SplitToHeapStrings() + { + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) + { + totalNumberOfIds += SplitToHeapStrings(stringWithCommaSeparatedNumbers).Length; + } + + return totalNumberOfIds; + } + + private static int[] SplitToHeapStrings(string path) + { + string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); + List nodeIds = new(pathSegments.Length); // here we know how large the resulting list should at least be + for (int i = pathSegments.Length - 1; i >= 0; i--) + { + if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + { + nodeIds.Add(pathSegment); + } + } + + return nodeIds.ToArray(); // allocates a new array + } + + /// + /// Here we avoid allocating strings to the separated values, + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it + /// + /// A number so the compiler/runtime doesn't optimize it away. + [Benchmark] + public int SplitToStackSpansWithoutEmptyCheckReversingListAsSpan() + { + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) + { + totalNumberOfIds += SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(stringWithCommaSeparatedNumbers).Length; + } + + return totalNumberOfIds; + } + + private static int[] SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(string path) + { + ReadOnlySpan pathSpan = path.AsSpan(); + MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); + + // Here we do NOT know how large the resulting list should at least be + // Default empty List<> internal array capacity on add is currently 4 + // If the count of numbers are less than 4, we overallocate a little + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array + // If the count of numbers are more than 8, another new array is allocated and copied to + List nodeIds = []; + foreach (Range rangeOfPathSegment in pathSegments) + { + // this is only a span of the string, a string is not allocated on the heap + ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + { + nodeIds.Add(pathSegment); + } + } + + Span nodeIdsSpan = CollectionsMarshal.AsSpan(nodeIds); + var result = new int[nodeIdsSpan.Length]; + var resultIndex = 0; + for (int i = nodeIdsSpan.Length - 1; i >= 0; i--) + { + result[resultIndex++] = nodeIdsSpan[i]; + } + + return result; + } + + /// + /// Here we avoid allocating strings to the separated values, + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it + /// + /// A number so the compiler/runtime doesn't optimize it away. + [Benchmark] + public int SplitToStackSpansWithoutEmptyCheck() + { + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) + { + totalNumberOfIds += SplitToStackSpansWithoutEmptyCheck(stringWithCommaSeparatedNumbers).Length; + } + + return totalNumberOfIds; + } + + private static int[] SplitToStackSpansWithoutEmptyCheck(string path) + { + ReadOnlySpan pathSpan = path.AsSpan(); + MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); + + // Here we do NOT know how large the resulting list should at least be + // Default empty List<> internal array capacity on add is currently 4 + // If the count of numbers are less than 4, we overallocate a little + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array + // If the count of numbers are more than 8, another new array is allocated and copied to + List nodeIds = []; + foreach (Range rangeOfPathSegment in pathSegments) + { + // this is only a span of the string, a string is not allocated on the heap + ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + { + nodeIds.Add(pathSegment); + } + } + + var result = new int[nodeIds.Count]; + var resultIndex = 0; + for (int i = nodeIds.Count - 1; i >= 0; i--) + { + result[resultIndex++] = nodeIds[i]; + } + + return result; + } + + /// + /// Here we avoid allocating strings to the separated values, + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it + /// + /// Here with an empty check, unlikely in umbraco use case. + /// A number so the compiler/runtime doesn't optimize it away. + [Benchmark] + public int SplitToStackSpansWithEmptyCheck() + { + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) + { + totalNumberOfIds += SplitToStackSpansWithEmptyCheck(stringWithCommaSeparatedNumbers).Length; + } + + return totalNumberOfIds; + } + + private static int[] SplitToStackSpansWithEmptyCheck(string path) + { + ReadOnlySpan pathSpan = path.AsSpan(); + MemoryExtensions.SpanSplitEnumerator pathSegments = pathSpan.Split(Constants.CharArrays.Comma); + + // Here we do NOT know how large the resulting list should at least be + // Default empty List<> internal array capacity on add is currently 4 + // If the count of numbers are less than 4, we overallocate a little + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array + // If the count of numbers are more than 8, another new array is allocated and copied to + List nodeIds = []; + foreach (Range rangeOfPathSegment in pathSegments) + { + // this is only a span of the string, a string is not allocated on the heap + ReadOnlySpan pathSegmentSpan = pathSpan[rangeOfPathSegment]; + if (pathSegmentSpan.IsEmpty) + { + continue; + } + + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + { + nodeIds.Add(pathSegment); + } + } + + var result = new int[nodeIds.Count]; + var resultIndex = 0; + for (int i = nodeIds.Count - 1; i >= 0; i--) + { + result[resultIndex++] = nodeIds[i]; + } + + return result; + } + +// BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2894) +// Intel Core i7-10750H CPU 2.60GHz, 1 CPU, 12 logical and 6 physical cores +// .NET Core SDK 3.1.426 [C:\Program Files\dotnet\sdk] +// [Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 +// +// Toolchain=InProcessEmitToolchain +// +// | Method | Mean | Error | StdDev | Gen0 | Allocated | +// |------------------------------------------------------ |---------:|---------:|---------:|-------:|----------:| +// | Linq | 46.39 us | 0.515 us | 0.430 us | 9.4604 | 58.31 KB | +// | SplitToHeapStrings | 30.28 us | 0.310 us | 0.275 us | 7.0801 | 43.55 KB | +// | SplitToStackSpansWithoutEmptyCheckReversingListAsSpan | 20.47 us | 0.290 us | 0.257 us | 2.7161 | 16.73 KB | +// | SplitToStackSpansWithoutEmptyCheck | 20.60 us | 0.315 us | 0.280 us | 2.7161 | 16.73 KB | +// | SplitToStackSpansWithEmptyCheck | 20.57 us | 0.308 us | 0.288 us | 2.7161 | 16.73 KB | +}