2020-08-13 23:09:40 +10:00
using K4os.Compression.LZ4 ;
using MessagePack ;
2020-07-03 13:30:40 +10:00
using MessagePack.Resolvers ;
2020-07-03 12:11:05 +10:00
using System ;
2020-08-13 23:09:40 +10:00
using System.Linq ;
using System.Text ;
2021-01-28 13:50:18 +11:00
using Umbraco.Core.Models ;
2020-09-24 18:37:24 +10:00
using Umbraco.Core.PropertyEditors ;
2020-07-03 12:11:05 +10:00
namespace Umbraco.Web.PublishedCache.NuCache.DataSource
{
2020-09-25 00:32:11 +10:00
2020-08-13 22:15:09 +10:00
/// <summary>
2020-09-25 00:32:11 +10:00
/// Serializes/Deserializes <see cref="ContentCacheDataModel"/> document to the SQL Database as bytes using MessagePack
2020-08-13 22:15:09 +10:00
/// </summary>
2020-09-25 00:32:11 +10:00
public class MsgPackContentNestedDataSerializer : IContentCacheDataSerializer
2020-07-03 12:11:05 +10:00
{
2020-09-25 00:32:11 +10:00
private readonly MessagePackSerializerOptions _options ;
2021-01-28 13:50:18 +11:00
private readonly IPropertyCacheCompression _propertyOptions ;
2020-07-03 12:11:05 +10:00
2021-01-28 13:50:18 +11:00
public MsgPackContentNestedDataSerializer ( IPropertyCacheCompression propertyOptions )
2020-07-03 12:11:05 +10:00
{
2020-09-25 00:32:11 +10:00
_propertyOptions = propertyOptions ? ? throw new ArgumentNullException ( nameof ( propertyOptions ) ) ;
2020-07-03 13:30:40 +10:00
2020-09-25 00:32:11 +10:00
var defaultOptions = ContractlessStandardResolver . Options ;
2020-07-03 13:30:40 +10:00
var resolver = CompositeResolver . Create (
// TODO: We want to be able to intern the strings for aliases when deserializing like we do for Newtonsoft but I'm unsure exactly how
// to do that but it would seem to be with a custom message pack resolver but I haven't quite figured out based on the docs how
// to do that since that is part of the int key -> string mapping operation, might have to see the source code to figure that one out.
2021-06-22 15:12:07 -06:00
// There are docs here on how to build one of these: https://github.com/neuecc/MessagePack-CSharp/blob/master/README.md#low-level-api-imessagepackformattert
// and there are a couple examples if you search on google for them but this will need to be a separate project.
// NOTE: resolver custom types first
2020-07-03 13:30:40 +10:00
// new ContentNestedDataResolver(),
// finally use standard resolver
defaultOptions . Resolver
) ;
_options = defaultOptions
. WithResolver ( resolver )
2021-07-09 19:33:42 +12:00
. WithCompression ( MessagePackCompression . Lz4BlockArray )
2021-07-10 14:25:26 +02:00
. WithSecurity ( MessagePackSecurity . UntrustedData ) ;
2020-07-03 12:11:05 +10:00
}
2020-09-25 00:32:11 +10:00
public string ToJson ( byte [ ] bin )
2020-07-03 12:11:05 +10:00
{
var json = MessagePackSerializer . ConvertToJson ( bin , _options ) ;
return json ;
}
2021-07-10 14:25:26 +02:00
public ContentCacheDataModel Deserialize ( IReadOnlyContentBase content , string stringData , byte [ ] byteData , bool published )
2020-08-26 11:43:43 +10:00
{
2020-11-24 10:10:38 +13:00
if ( byteData ! = null )
2020-09-25 00:32:11 +10:00
{
2021-01-28 13:50:18 +11:00
var cacheModel = MessagePackSerializer . Deserialize < ContentCacheDataModel > ( byteData , _options ) ;
2021-07-09 19:33:42 +12:00
Expand ( content , cacheModel , published ) ;
2021-01-28 13:50:18 +11:00
return cacheModel ;
2020-09-25 00:32:11 +10:00
}
2020-11-24 10:10:38 +13:00
else if ( stringData ! = null )
2020-09-25 00:32:11 +10:00
{
2020-11-24 10:10:38 +13:00
// NOTE: We don't really support strings but it's possible if manually used (i.e. tests)
var bin = Convert . FromBase64String ( stringData ) ;
2021-01-28 13:50:18 +11:00
var cacheModel = MessagePackSerializer . Deserialize < ContentCacheDataModel > ( bin , _options ) ;
2021-07-10 14:25:26 +02:00
Expand ( content , cacheModel , published ) ;
2021-01-28 13:50:18 +11:00
return cacheModel ;
2020-09-25 00:32:11 +10:00
}
else
{
return null ;
}
2020-08-26 11:43:43 +10:00
}
2021-07-10 14:25:26 +02:00
public ContentCacheDataSerializationResult Serialize ( IReadOnlyContentBase content , ContentCacheDataModel model , bool published )
2020-08-26 11:43:43 +10:00
{
2021-07-09 19:33:42 +12:00
Compress ( content , model , published ) ;
2020-09-25 00:32:11 +10:00
var bytes = MessagePackSerializer . Serialize ( model , _options ) ;
return new ContentCacheDataSerializationResult ( null , bytes ) ;
2020-08-26 11:43:43 +10:00
}
2020-08-13 23:09:40 +10:00
/// <summary>
2020-08-26 15:57:13 +10:00
/// Used during serialization to compress properties
2020-08-13 23:09:40 +10:00
/// </summary>
2021-07-10 14:25:26 +02:00
/// <param name="content"></param>
2020-09-25 00:32:11 +10:00
/// <param name="model"></param>
2021-07-10 14:25:26 +02:00
/// <param name="published"></param>
2021-01-27 15:31:40 +11:00
/// <remarks>
/// This will essentially 'double compress' property data. The MsgPack data as a whole will already be compressed
/// but this will go a step further and double compress property data so that it is stored in the nucache file
/// as compressed bytes and therefore will exist in memory as compressed bytes. That is, until the bytes are
/// read/decompressed as a string to be displayed on the front-end. This allows for potentially a significant
/// memory savings but could also affect performance of first rendering pages while decompression occurs.
/// </remarks>
2021-07-10 14:25:26 +02:00
private void Compress ( IReadOnlyContentBase content , ContentCacheDataModel model , bool published )
2020-08-13 23:09:40 +10:00
{
2020-09-25 00:32:11 +10:00
foreach ( var propertyAliasToData in model . PropertyData )
2020-08-13 23:09:40 +10:00
{
2021-07-10 14:25:26 +02:00
if ( _propertyOptions . IsCompressed ( content , propertyAliasToData . Key , published ) )
2020-08-13 23:09:40 +10:00
{
2020-08-26 15:57:13 +10:00
foreach ( var property in propertyAliasToData . Value . Where ( x = > x . Value ! = null & & x . Value is string ) )
2020-08-13 23:09:40 +10:00
{
2020-08-26 15:57:13 +10:00
property . Value = LZ4Pickler . Pickle ( Encoding . UTF8 . GetBytes ( ( string ) property . Value ) , LZ4Level . L00_FAST ) ;
2020-08-13 23:09:40 +10:00
}
}
}
}
2020-08-26 11:43:43 +10:00
/// <summary>
2020-08-26 15:57:13 +10:00
/// Used during deserialization to map the property data as lazy or expand the value
2020-08-26 11:43:43 +10:00
/// </summary>
2021-07-10 14:25:26 +02:00
/// <param name="content"></param>
2020-08-26 11:43:43 +10:00
/// <param name="nestedData"></param>
2021-07-10 14:25:26 +02:00
/// <param name="published"></param>
private void Expand ( IReadOnlyContentBase content , ContentCacheDataModel nestedData , bool published )
2020-08-13 23:32:05 +10:00
{
2020-08-26 15:57:13 +10:00
foreach ( var propertyAliasToData in nestedData . PropertyData )
2020-08-26 11:43:43 +10:00
{
2021-07-09 19:33:42 +12:00
if ( _propertyOptions . IsCompressed ( content , propertyAliasToData . Key , published ) )
2020-08-26 11:43:43 +10:00
{
2020-08-26 15:57:13 +10:00
foreach ( var property in propertyAliasToData . Value . Where ( x = > x . Value ! = null ) )
2020-08-26 11:43:43 +10:00
{
2020-08-26 15:57:13 +10:00
if ( property . Value is byte [ ] byteArrayValue )
2020-08-26 11:43:43 +10:00
{
2020-08-26 15:57:13 +10:00
property . Value = new LazyCompressedString ( byteArrayValue ) ;
2020-08-26 11:43:43 +10:00
}
}
}
}
2020-08-13 23:32:05 +10:00
}
2020-07-03 12:11:05 +10:00
}
}