From e75c9d2273232e7c4419172c0763cc18b483b494 Mon Sep 17 00:00:00 2001 From: Shannon Date: Fri, 3 Jul 2020 00:26:55 +1000 Subject: [PATCH] Interns strings for aliases, etc... for when content is deserialized from the contentNu table so we aren't duplicating strings when cold booting. --- .../AutoInterningStringConverter.cs | 38 +++++++++++ ...ngKeyCaseInsensitiveDictionaryConverter.cs | 54 +++++++++++++++ .../CaseInsensitiveDictionaryConverter.cs | 14 +++- src/Umbraco.Core/Umbraco.Core.csproj | 2 + .../AutoInterningStringConverterTests.cs | 67 +++++++++++++++++++ src/Umbraco.Tests/Umbraco.Tests.csproj | 1 + ....DictionaryOfCultureVariationSerializer.cs | 9 ++- ...Tree.DictionaryOfPropertyDataSerializer.cs | 4 +- .../NuCache/DataSource/ContentNestedData.cs | 4 +- .../NuCache/DataSource/PropertyData.cs | 4 +- .../NuCache/DataSource/SerializerBase.cs | 6 +- 11 files changed, 193 insertions(+), 10 deletions(-) create mode 100644 src/Umbraco.Core/Serialization/AutoInterningStringConverter.cs create mode 100644 src/Umbraco.Core/Serialization/AutoInterningStringKeyCaseInsensitiveDictionaryConverter.cs create mode 100644 src/Umbraco.Tests/Serialization/AutoInterningStringConverterTests.cs diff --git a/src/Umbraco.Core/Serialization/AutoInterningStringConverter.cs b/src/Umbraco.Core/Serialization/AutoInterningStringConverter.cs new file mode 100644 index 0000000000..2d1f12f5da --- /dev/null +++ b/src/Umbraco.Core/Serialization/AutoInterningStringConverter.cs @@ -0,0 +1,38 @@ +using System; +using System.Collections; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace Umbraco.Core.Serialization +{ + + /// + /// When applied to a string or string collection field will ensure the deserialized strings are interned + /// + /// + /// Borrowed from https://stackoverflow.com/a/34906004/694494 + /// On the same page an interesting approach of using a local intern pool https://stackoverflow.com/a/39605620/694494 which re-uses .NET System.Xml.NameTable + /// + internal class AutoInterningStringConverter : JsonConverter + { + public override bool CanConvert(Type objectType) + { + // CanConvert is not called when a converter is applied directly to a property. + throw new NotImplementedException($"{nameof(AutoInterningStringConverter)} should not be used globally"); + } + + public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) + { + if (reader.TokenType == JsonToken.Null) + return null; + // Check is in case the value is a non-string literal such as an integer. + var s = reader.TokenType == JsonToken.String + ? string.Intern((string)reader.Value) + : string.Intern((string)JToken.Load(reader)); + return s; + } + + public override bool CanWrite => false; + public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) => throw new NotImplementedException(); + } +} diff --git a/src/Umbraco.Core/Serialization/AutoInterningStringKeyCaseInsensitiveDictionaryConverter.cs b/src/Umbraco.Core/Serialization/AutoInterningStringKeyCaseInsensitiveDictionaryConverter.cs new file mode 100644 index 0000000000..2076462f0c --- /dev/null +++ b/src/Umbraco.Core/Serialization/AutoInterningStringKeyCaseInsensitiveDictionaryConverter.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using Newtonsoft.Json; +using Newtonsoft.Json.Converters; + +namespace Umbraco.Core.Serialization +{ + /// + /// When applied to a dictionary with a string key, will ensure the deserialized string keys are interned + /// + /// + /// + /// borrowed from https://stackoverflow.com/a/36116462/694494 + /// + internal class AutoInterningStringKeyCaseInsensitiveDictionaryConverter : CaseInsensitiveDictionaryConverter + { + public AutoInterningStringKeyCaseInsensitiveDictionaryConverter() + { + } + public AutoInterningStringKeyCaseInsensitiveDictionaryConverter(StringComparer comparer) : base(comparer) + { + } + + public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) + { + if (reader.TokenType == JsonToken.StartObject) + { + var dictionary = new Dictionary(); + while (reader.Read()) + { + switch (reader.TokenType) + { + case JsonToken.PropertyName: + var key = string.Intern(reader.Value.ToString()); + + if (!reader.Read()) + throw new Exception("Unexpected end when reading object."); + + var v = serializer.Deserialize(reader); + dictionary[key] = v; + break; + case JsonToken.Comment: + break; + case JsonToken.EndObject: + return dictionary; + } + } + } + return null; + } + + } +} diff --git a/src/Umbraco.Core/Serialization/CaseInsensitiveDictionaryConverter.cs b/src/Umbraco.Core/Serialization/CaseInsensitiveDictionaryConverter.cs index a92d562a52..d5cbc0da31 100644 --- a/src/Umbraco.Core/Serialization/CaseInsensitiveDictionaryConverter.cs +++ b/src/Umbraco.Core/Serialization/CaseInsensitiveDictionaryConverter.cs @@ -14,12 +14,24 @@ namespace Umbraco.Core.Serialization /// public class CaseInsensitiveDictionaryConverter : CustomCreationConverter { + private readonly StringComparer _comparer; + + public CaseInsensitiveDictionaryConverter() + : this(StringComparer.OrdinalIgnoreCase) + { + } + + public CaseInsensitiveDictionaryConverter(StringComparer comparer) + { + _comparer = comparer ?? throw new ArgumentNullException(nameof(comparer)); + } + public override bool CanWrite => false; public override bool CanRead => true; public override bool CanConvert(Type objectType) => typeof(IDictionary).IsAssignableFrom(objectType); - public override IDictionary Create(Type objectType) => new Dictionary(StringComparer.OrdinalIgnoreCase); + public override IDictionary Create(Type objectType) => new Dictionary(_comparer); } } diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index b2a8ea2a6d..c5a953631a 100755 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -139,6 +139,8 @@ + + diff --git a/src/Umbraco.Tests/Serialization/AutoInterningStringConverterTests.cs b/src/Umbraco.Tests/Serialization/AutoInterningStringConverterTests.cs new file mode 100644 index 0000000000..bf99b68077 --- /dev/null +++ b/src/Umbraco.Tests/Serialization/AutoInterningStringConverterTests.cs @@ -0,0 +1,67 @@ +using Newtonsoft.Json; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Xml.Serialization; +using Umbraco.Core.Serialization; + +namespace Umbraco.Tests.Serialization +{ + [TestFixture] + public class AutoInterningStringConverterTests + { + [Test] + public void Intern_Property_String() + { + var str1 = "Hello"; + var obj = new Test + { + Name = str1 + " " + "there" + }; + + // ensure the raw value is not interned + Assert.IsNull(string.IsInterned(obj.Name)); + + var serialized = JsonConvert.SerializeObject(obj); + obj = JsonConvert.DeserializeObject(serialized); + + Assert.IsNotNull(string.IsInterned(obj.Name)); + } + + [Test] + public void Intern_Property_Dictionary() + { + var str1 = "key"; + var obj = new Test + { + Values = new Dictionary + { + [str1 + "1"] = 0, + [str1 + "2"] = 1 + } + }; + + // ensure the raw value is not interned + Assert.IsNull(string.IsInterned(obj.Values.Keys.First())); + Assert.IsNull(string.IsInterned(obj.Values.Keys.Last())); + + var serialized = JsonConvert.SerializeObject(obj); + obj = JsonConvert.DeserializeObject(serialized); + + Assert.IsNotNull(string.IsInterned(obj.Values.Keys.First())); + Assert.IsNotNull(string.IsInterned(obj.Values.Keys.Last())); + } + + public class Test + { + [JsonConverter(typeof(AutoInterningStringConverter))] + public string Name { get; set; } + + [JsonConverter(typeof(AutoInterningStringKeyCaseInsensitiveDictionaryConverter))] + public Dictionary Values = new Dictionary(); + } + } +} diff --git a/src/Umbraco.Tests/Umbraco.Tests.csproj b/src/Umbraco.Tests/Umbraco.Tests.csproj index 731dc05363..d2ee8e83df 100644 --- a/src/Umbraco.Tests/Umbraco.Tests.csproj +++ b/src/Umbraco.Tests/Umbraco.Tests.csproj @@ -158,6 +158,7 @@ + diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfCultureVariationSerializer.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfCultureVariationSerializer.cs index 4521311302..1fcbdbdae7 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfCultureVariationSerializer.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfCultureVariationSerializer.cs @@ -18,8 +18,13 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource var dict = new Dictionary(StringComparer.InvariantCultureIgnoreCase); for (var i = 0; i < pcount; i++) { - var languageId = PrimitiveSerializer.String.ReadFrom(stream); - var cultureVariation = new CultureVariation { Name = ReadStringObject(stream), UrlSegment = ReadStringObject(stream), Date = ReadDateTime(stream) }; + var languageId = string.Intern(PrimitiveSerializer.String.ReadFrom(stream)); + var cultureVariation = new CultureVariation + { + Name = ReadStringObject(stream), + UrlSegment = ReadStringObject(stream), + Date = ReadDateTime(stream) + }; dict[languageId] = cultureVariation; } return dict; diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfPropertyDataSerializer.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfPropertyDataSerializer.cs index 15c6a9f1bd..e3fa6597e4 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfPropertyDataSerializer.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/BTree.DictionaryOfPropertyDataSerializer.cs @@ -38,8 +38,8 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource // the 'current' value, and string.Empty should be used to represent the invariant or // neutral values - PropertyData throws when getting nulls, so falling back to // string.Empty here - what else? - pdata.Culture = string.Intern(ReadStringObject(stream)) ?? string.Empty; - pdata.Segment = string.Intern(ReadStringObject(stream)) ?? string.Empty; + pdata.Culture = ReadStringObject(stream, true) ?? string.Empty; + pdata.Segment = ReadStringObject(stream, true) ?? string.Empty; pdata.Value = ReadObject(stream); } diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/ContentNestedData.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/ContentNestedData.cs index ec5424ad9a..751644c715 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/ContentNestedData.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/ContentNestedData.cs @@ -11,11 +11,11 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource { //dont serialize empty properties [JsonProperty("pd")] - [JsonConverter(typeof(CaseInsensitiveDictionaryConverter))] + [JsonConverter(typeof(AutoInterningStringKeyCaseInsensitiveDictionaryConverter))] public Dictionary PropertyData { get; set; } [JsonProperty("cd")] - [JsonConverter(typeof(CaseInsensitiveDictionaryConverter))] + [JsonConverter(typeof(AutoInterningStringKeyCaseInsensitiveDictionaryConverter))] public Dictionary CultureData { get; set; } [JsonProperty("us")] diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/PropertyData.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/PropertyData.cs index 4abcbc7e6f..6ccb1dc210 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/PropertyData.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/PropertyData.cs @@ -1,6 +1,7 @@ using System; using System.ComponentModel; using Newtonsoft.Json; +using Umbraco.Core.Serialization; namespace Umbraco.Web.PublishedCache.NuCache.DataSource { @@ -9,6 +10,7 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource private string _culture; private string _segment; + [JsonConverter(typeof(AutoInterningStringConverter))] [DefaultValue("")] [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate, PropertyName = "c")] public string Culture @@ -17,6 +19,7 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource set => _culture = value ?? throw new ArgumentNullException(nameof(value)); // TODO: or fallback to string.Empty? CANNOT be null } + [JsonConverter(typeof(AutoInterningStringConverter))] [DefaultValue("")] [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate, PropertyName = "s")] public string Segment @@ -28,7 +31,6 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource [JsonProperty("v")] public object Value { get; set; } - //Legacy properties used to deserialize existing nucache db entries [JsonProperty("culture")] private string LegacyCulture diff --git a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/SerializerBase.cs b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/SerializerBase.cs index ed17420645..6196be9a3a 100644 --- a/src/Umbraco.Web/PublishedCache/NuCache/DataSource/SerializerBase.cs +++ b/src/Umbraco.Web/PublishedCache/NuCache/DataSource/SerializerBase.cs @@ -23,13 +23,15 @@ namespace Umbraco.Web.PublishedCache.NuCache.DataSource return read(stream); } - protected string ReadStringObject(Stream stream) // required 'cos string is not a struct + protected string ReadStringObject(Stream stream, bool intern = false) // required 'cos string is not a struct { var type = PrimitiveSerializer.Char.ReadFrom(stream); if (type == 'N') return null; if (type != 'S') throw new NotSupportedException($"Cannot deserialize type '{type}', expected 'S'."); - return PrimitiveSerializer.String.ReadFrom(stream); + return intern + ? string.Intern(PrimitiveSerializer.String.ReadFrom(stream)) + : PrimitiveSerializer.String.ReadFrom(stream); } protected int? ReadIntObject(Stream stream) => ReadObject(stream, 'I', ReadInt);