From 51f6c639bfbdbd9ac22281494e548ba62567886c Mon Sep 17 00:00:00 2001 From: Stephan Date: Thu, 21 Feb 2019 09:35:38 +0100 Subject: [PATCH 1/3] Cleanup Url token encoding --- src/Umbraco.Core/StringExtensions.cs | 109 ++++++++---------- .../Strings/StringExtensionsTests.cs | 15 +++ 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs index b80149a4ef..4df1105bf7 100644 --- a/src/Umbraco.Core/StringExtensions.cs +++ b/src/Umbraco.Core/StringExtensions.cs @@ -593,13 +593,14 @@ namespace Umbraco.Core /// public static string ToUrlBase64(this string input) { - if (input == null) throw new ArgumentNullException("input"); + if (input == null) throw new ArgumentNullException(nameof(input)); - if (String.IsNullOrEmpty(input)) return String.Empty; + if (string.IsNullOrEmpty(input)) + return string.Empty; + //return Convert.ToBase64String(bytes).Replace(".", "-").Replace("/", "_").Replace("=", ","); var bytes = Encoding.UTF8.GetBytes(input); return UrlTokenEncode(bytes); - //return Convert.ToBase64String(bytes).Replace(".", "-").Replace("/", "_").Replace("=", ","); } /// @@ -609,14 +610,14 @@ namespace Umbraco.Core /// public static string FromUrlBase64(this string input) { - if (input == null) throw new ArgumentNullException("input"); + if (input == null) throw new ArgumentNullException(nameof(input)); //if (input.IsInvalidBase64()) return null; try { //var decodedBytes = Convert.FromBase64String(input.Replace("-", ".").Replace("_", "/").Replace(",", "=")); - byte[] decodedBytes = UrlTokenDecode(input); + var decodedBytes = UrlTokenDecode(input); return decodedBytes != null ? Encoding.UTF8.GetString(decodedBytes) : null; } catch (FormatException) @@ -795,42 +796,40 @@ namespace Umbraco.Core internal static byte[] UrlTokenDecode(string input) { if (input == null) + throw new ArgumentNullException(nameof(input)); + + if (input.Length == 0) + return Array.Empty(); + + // calc array size - must be groups of 4 + var arrayLength = input.Length; + var remain = arrayLength % 4; + if (remain != 0) arrayLength += 4 - remain; + + var inArray = new char[arrayLength]; + for (var i = 0; i < input.Length; i++) { - throw new ArgumentNullException("input"); - } - int length = input.Length; - if (length < 1) - { - return new byte[0]; - } - int num2 = input[length - 1] - '0'; - if ((num2 < 0) || (num2 > 10)) - { - return null; - } - char[] inArray = new char[(length - 1) + num2]; - for (int i = 0; i < (length - 1); i++) - { - char ch = input[i]; + var ch = input[i]; switch (ch) { - case '-': + case '-': // restore '-' as '+' inArray[i] = '+'; break; - case '_': + case '_': // restore '_' as '/' inArray[i] = '/'; break; - default: + default: // keep char unchanged inArray[i] = ch; break; } } - for (int j = length - 1; j < inArray.Length; j++) - { + + // pad with '=' + for (var j = input.Length; j < inArray.Length; j++) inArray[j] = '='; - } + return Convert.FromBase64CharArray(inArray, 0, inArray.Length); } @@ -842,54 +841,40 @@ namespace Umbraco.Core internal static string UrlTokenEncode(byte[] input) { if (input == null) + throw new ArgumentNullException(nameof(input)); + + if (input.Length == 0) + return string.Empty; + + // base-64 digits are A-Z, a-z, 0-9, + and / + // the = char is used for trailing padding + + var str = Convert.ToBase64String(input); + + var pos = str.IndexOf('='); + if (pos < 0) pos = str.Length; + + // replace chars that would cause problems in urls + var chArray = new char[pos]; + for (var i = 0; i < pos; i++) { - throw new ArgumentNullException("input"); - } - if (input.Length < 1) - { - return String.Empty; - } - string str = null; - int index = 0; - char[] chArray = null; - str = Convert.ToBase64String(input); - if (str == null) - { - return null; - } - index = str.Length; - while (index > 0) - { - if (str[index - 1] != '=') - { - break; - } - index--; - } - chArray = new char[index + 1]; - chArray[index] = (char)((0x30 + str.Length) - index); - for (int i = 0; i < index; i++) - { - char ch = str[i]; + var ch = str[i]; switch (ch) { - case '+': + case '+': // replace '+' with '-' chArray[i] = '-'; break; - case '/': + case '/': // replace '/' with '_' chArray[i] = '_'; break; - case '=': - chArray[i] = ch; - break; - - default: + default: // keep char unchanged chArray[i] = ch; break; } } + return new string(chArray); } diff --git a/src/Umbraco.Tests/Strings/StringExtensionsTests.cs b/src/Umbraco.Tests/Strings/StringExtensionsTests.cs index f0db3991b8..4c365d733f 100644 --- a/src/Umbraco.Tests/Strings/StringExtensionsTests.cs +++ b/src/Umbraco.Tests/Strings/StringExtensionsTests.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Text; using NUnit.Framework; using Umbraco.Core; using Umbraco.Core.Composing; @@ -202,6 +203,20 @@ namespace Umbraco.Tests.Strings Assert.AreEqual(expected, result); } + [TestCase("hello", "aGVsbG8")] + [TestCase("tad", "dGFk")] + [TestCase("AmqGr+Fd!~ééé", "QW1xR3IrRmQhfsOpw6nDqQ")] + public void UrlTokenEncoding(string value, string expected) + { + var bytes = Encoding.UTF8.GetBytes(value); + Console.WriteLine("base64: " + Convert.ToBase64String(bytes)); + var encoded = StringExtensions.UrlTokenEncode(bytes); + Assert.AreEqual(expected, encoded); + + var backBytes = StringExtensions.UrlTokenDecode(encoded); + var backString = Encoding.UTF8.GetString(backBytes); + Assert.AreEqual(value, backString); + } // FORMAT STRINGS From 5edd61107f8ffd638ce13d8627448211ebf96afb Mon Sep 17 00:00:00 2001 From: Stephan Date: Thu, 21 Feb 2019 11:28:51 +0100 Subject: [PATCH 2/3] Combined Guids media path scheme --- .../CompositionExtensions/FileSystems.cs | 2 +- src/Umbraco.Core/GuidUtils.cs | 67 +++++++++++++++++++ .../CombinedGuidsMediaPathScheme.cs | 8 ++- src/Umbraco.Core/IO/ShadowWrapper.cs | 4 +- .../CoreThings/GuidUtilsTests.cs | 8 +++ src/Umbraco.Tests/IO/FileSystemsTests.cs | 2 +- src/Umbraco.Tests/Testing/UmbracoTestBase.cs | 2 +- 7 files changed, 86 insertions(+), 7 deletions(-) diff --git a/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs b/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs index 76b363f80e..4fbbec0265 100644 --- a/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs +++ b/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs @@ -79,7 +79,7 @@ namespace Umbraco.Core.Composing.CompositionExtensions composition.RegisterUnique(factory => factory.GetInstance()); // register the scheme for media paths - composition.RegisterUnique(); + composition.RegisterUnique(); // register the IMediaFileSystem implementation composition.RegisterFileSystem(); diff --git a/src/Umbraco.Core/GuidUtils.cs b/src/Umbraco.Core/GuidUtils.cs index 3768e1385d..d878cee16b 100644 --- a/src/Umbraco.Core/GuidUtils.cs +++ b/src/Umbraco.Core/GuidUtils.cs @@ -40,5 +40,72 @@ namespace Umbraco.Core public DecomposedGuid(Guid value) : this() => this.Value = value; } + + private static readonly char[] Base32Table = + { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', + 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5' + }; + + /// + /// Converts a Guid into a base-32 string. + /// + /// A Guid. + /// The string length. + /// A base-32 encoded string. + /// + /// A base-32 string representation of a Guid is the shortest, efficient, representation + /// that is case insensitive (base-64 is case sensitive). + /// Length must be 1-26, anything else becomes 26. + /// + public static string ToBase32String(Guid guid, int length = 26) + { + if (length <= 0 || length > 26) + length = 26; + + var bytes = guid.ToByteArray(); // a Guid is 128 bits ie 16 bytes + + // this could be optimized by making it unsafe, + // and fixing the table + bytes + chars (see Convert.ToBase64CharArray) + + // each block of 5 bytes = 5*8 = 40 bits + // becomes 40 bits = 8*5 = 8 byte-32 chars + // a Guid is 3 blocks + 8 bits + + // so it turns into a 3*8+2 = 26 chars string + var chars = new char[length]; + + var i = 0; + var j = 0; + + while (i < 15) + { + if (j == length) break; + chars[j++] = Base32Table[(bytes[i] & 0b1111_1000) >> 3]; + if (j == length) break; + chars[j++] = Base32Table[((bytes[i] & 0b0000_0111) << 2) | ((bytes[i + 1] & 0b1100_0000) >> 6)]; + if (j == length) break; + chars[j++] = Base32Table[(bytes[i + 1] & 0b0011_1110) >> 1]; + if (j == length) break; + chars[j++] = Base32Table[(bytes[i + 1] & 0b0000_0001) | ((bytes[i + 2] & 0b1111_0000) >> 4)]; + if (j == length) break; + chars[j++] = Base32Table[((bytes[i + 2] & 0b0000_1111) << 1) | ((bytes[i + 3] & 0b1000_0000) >> 7)]; + if (j == length) break; + chars[j++] = Base32Table[(bytes[i + 3] & 0b0111_1100) >> 2]; + if (j == length) break; + chars[j++] = Base32Table[((bytes[i + 3] & 0b0000_0011) << 3) | ((bytes[i + 4] & 0b1110_0000) >> 5)]; + if (j == length) break; + chars[j++] = Base32Table[bytes[i + 4] & 0b0001_1111]; + + i += 5; + } + + if (j < length) + chars[j++] = Base32Table[(bytes[i] & 0b1111_1000) >> 3]; + if (j < length) + chars[j] = Base32Table[(bytes[i] & 0b0000_0111) << 2]; + + return new string(chars); + } } } diff --git a/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs b/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs index 036d7cf3a0..fa5ec12142 100644 --- a/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs +++ b/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs @@ -11,13 +11,17 @@ namespace Umbraco.Core.IO.MediaPathSchemes /// public class CombinedGuidsMediaPathScheme : IMediaPathScheme { + private const int DirectoryLength = 8; + /// public string GetFilePath(IMediaFileSystem fileSystem, Guid itemGuid, Guid propertyGuid, string filename, string previous = null) { // assumes that cuid and puid keys can be trusted - and that a single property type // for a single content cannot store two different files with the same name - var directory = HexEncoder.Encode(GuidUtils.Combine(itemGuid, propertyGuid).ToByteArray()/*'/', 2, 4*/); // could use ext to fragment path eg 12/e4/f2/... - return Path.Combine(directory, filename).Replace('\\', '/').Substring(0, 9); + + var combinedGuid = GuidUtils.Combine(itemGuid, propertyGuid); + var directory = GuidUtils.ToBase32String(combinedGuid, DirectoryLength); // see also HexEncoder, we may want to fragment path eg 12/e4/f3... + return Path.Combine(directory, filename).Replace('\\', '/'); } /// diff --git a/src/Umbraco.Core/IO/ShadowWrapper.cs b/src/Umbraco.Core/IO/ShadowWrapper.cs index 09ce5f24d0..dacef52a92 100644 --- a/src/Umbraco.Core/IO/ShadowWrapper.cs +++ b/src/Umbraco.Core/IO/ShadowWrapper.cs @@ -25,7 +25,7 @@ namespace Umbraco.Core.IO public static string CreateShadowId() { const int retries = 50; // avoid infinite loop - const int idLength = 6; // 6 chars + const int idLength = 8; // 6 chars // shorten a Guid to idLength chars, and see whether it collides // with an existing directory or not - if it does, try again, and @@ -34,7 +34,7 @@ namespace Umbraco.Core.IO for (var i = 0; i < retries; i++) { - var id = Guid.NewGuid().ToString("N").Substring(0, idLength); + var id = GuidUtils.ToBase32String(Guid.NewGuid(), idLength); var virt = ShadowFsPath + "/" + id; var shadowDir = IOHelper.MapPath(virt); diff --git a/src/Umbraco.Tests/CoreThings/GuidUtilsTests.cs b/src/Umbraco.Tests/CoreThings/GuidUtilsTests.cs index 5ef8cba356..639d85b4ff 100644 --- a/src/Umbraco.Tests/CoreThings/GuidUtilsTests.cs +++ b/src/Umbraco.Tests/CoreThings/GuidUtilsTests.cs @@ -15,6 +15,14 @@ namespace Umbraco.Tests.CoreThings Assert.AreEqual(GuidUtils.Combine(a, b).ToByteArray(), Combine(a, b)); } + [Test] + public void GuidThingTest() + { + var guid = new Guid("f918382f-2bba-453f-a3e2-1f594016ed3b"); + Assert.AreEqual("f22br4n0fm5fli5c", GuidUtils.ToBase32String(guid, 16)); + Assert.AreEqual("f22br4n0f", GuidUtils.ToBase32String(guid, 9)); + } + // Reference implementation taken from original code. private static byte[] Combine(Guid guid1, Guid guid2) { diff --git a/src/Umbraco.Tests/IO/FileSystemsTests.cs b/src/Umbraco.Tests/IO/FileSystemsTests.cs index 8c70de883f..1f0250c066 100644 --- a/src/Umbraco.Tests/IO/FileSystemsTests.cs +++ b/src/Umbraco.Tests/IO/FileSystemsTests.cs @@ -33,7 +33,7 @@ namespace Umbraco.Tests.IO composition.Register(_ => Mock.Of()); composition.Register(_ => Mock.Of()); composition.Register(_ => Mock.Of()); - composition.RegisterUnique(); + composition.RegisterUnique(); composition.Configs.Add(SettingsForTests.GetDefaultGlobalSettings); composition.Configs.Add(SettingsForTests.GetDefaultUmbracoSettings); diff --git a/src/Umbraco.Tests/Testing/UmbracoTestBase.cs b/src/Umbraco.Tests/Testing/UmbracoTestBase.cs index f6f8f220b8..8fb8984fea 100644 --- a/src/Umbraco.Tests/Testing/UmbracoTestBase.cs +++ b/src/Umbraco.Tests/Testing/UmbracoTestBase.cs @@ -243,7 +243,7 @@ namespace Umbraco.Tests.Testing Composition.WithCollectionBuilder(); Composition.RegisterUnique(); - Composition.RegisterUnique(); + Composition.RegisterUnique(); // register empty content apps collection Composition.WithCollectionBuilder(); From 9ecd63eaed1d4eaee35c448fb922b5581731a749 Mon Sep 17 00:00:00 2001 From: Stephan Date: Thu, 21 Feb 2019 12:20:10 +0100 Subject: [PATCH 3/3] Move to UniqueMediaPathScheme --- .../CompositionExtensions/FileSystems.cs | 2 +- .../CombinedGuidsMediaPathScheme.cs | 1 + .../MediaPathSchemes/UniqueMediaPathScheme.cs | 38 +++++++++++++++++++ src/Umbraco.Core/Umbraco.Core.csproj | 1 + src/Umbraco.Tests/IO/FileSystemsTests.cs | 2 +- src/Umbraco.Tests/Testing/UmbracoTestBase.cs | 2 +- 6 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 src/Umbraco.Core/IO/MediaPathSchemes/UniqueMediaPathScheme.cs diff --git a/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs b/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs index 4fbbec0265..078a505be9 100644 --- a/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs +++ b/src/Umbraco.Core/Composing/CompositionExtensions/FileSystems.cs @@ -79,7 +79,7 @@ namespace Umbraco.Core.Composing.CompositionExtensions composition.RegisterUnique(factory => factory.GetInstance()); // register the scheme for media paths - composition.RegisterUnique(); + composition.RegisterUnique(); // register the IMediaFileSystem implementation composition.RegisterFileSystem(); diff --git a/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs b/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs index fa5ec12142..8a4c0e5dc0 100644 --- a/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs +++ b/src/Umbraco.Core/IO/MediaPathSchemes/CombinedGuidsMediaPathScheme.cs @@ -8,6 +8,7 @@ namespace Umbraco.Core.IO.MediaPathSchemes /// /// /// Path is "{combinedGuid}/{filename}" where combinedGuid is a combination of itemGuid and propertyGuid. + /// This scheme is dangerous, as it does not prevent potential collisions. /// public class CombinedGuidsMediaPathScheme : IMediaPathScheme { diff --git a/src/Umbraco.Core/IO/MediaPathSchemes/UniqueMediaPathScheme.cs b/src/Umbraco.Core/IO/MediaPathSchemes/UniqueMediaPathScheme.cs new file mode 100644 index 0000000000..722c92d3de --- /dev/null +++ b/src/Umbraco.Core/IO/MediaPathSchemes/UniqueMediaPathScheme.cs @@ -0,0 +1,38 @@ +using System; +using System.IO; + +namespace Umbraco.Core.IO.MediaPathSchemes +{ + /// + /// Implements a unique directory media path scheme. + /// + /// + /// This scheme provides short paths, yet handle potential collisions. + /// + public class UniqueMediaPathScheme : IMediaPathScheme + { + private const int DirectoryLength = 8; + + /// + public string GetFilePath(IMediaFileSystem fileSystem, Guid itemGuid, Guid propertyGuid, string filename, string previous = null) + { + string directory; + + // no point "combining" guids if all we want is some random guid - just get a new one + // and then, because we don't want collisions, ensure that the directory does not already exist + // (should be quite rare, but eh...) + + do + { + var combinedGuid = Guid.NewGuid(); + directory = GuidUtils.ToBase32String(combinedGuid, DirectoryLength); // see also HexEncoder, we may want to fragment path eg 12/e4/f3... + + } while (fileSystem.DirectoryExists(directory)); + + return Path.Combine(directory, filename).Replace('\\', '/'); + } + + /// + public string GetDeleteDirectory(IMediaFileSystem fileSystem, string filepath) => Path.GetDirectoryName(filepath); + } +} \ No newline at end of file diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index c7e7db18a7..65b346b895 100755 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -207,6 +207,7 @@ + diff --git a/src/Umbraco.Tests/IO/FileSystemsTests.cs b/src/Umbraco.Tests/IO/FileSystemsTests.cs index 1f0250c066..d4124530ba 100644 --- a/src/Umbraco.Tests/IO/FileSystemsTests.cs +++ b/src/Umbraco.Tests/IO/FileSystemsTests.cs @@ -33,7 +33,7 @@ namespace Umbraco.Tests.IO composition.Register(_ => Mock.Of()); composition.Register(_ => Mock.Of()); composition.Register(_ => Mock.Of()); - composition.RegisterUnique(); + composition.RegisterUnique(); composition.Configs.Add(SettingsForTests.GetDefaultGlobalSettings); composition.Configs.Add(SettingsForTests.GetDefaultUmbracoSettings); diff --git a/src/Umbraco.Tests/Testing/UmbracoTestBase.cs b/src/Umbraco.Tests/Testing/UmbracoTestBase.cs index 8fb8984fea..fedc94d45b 100644 --- a/src/Umbraco.Tests/Testing/UmbracoTestBase.cs +++ b/src/Umbraco.Tests/Testing/UmbracoTestBase.cs @@ -243,7 +243,7 @@ namespace Umbraco.Tests.Testing Composition.WithCollectionBuilder(); Composition.RegisterUnique(); - Composition.RegisterUnique(); + Composition.RegisterUnique(); // register empty content apps collection Composition.WithCollectionBuilder();