diff --git a/src/Umbraco.Core/IO/IOHelper.cs b/src/Umbraco.Core/IO/IOHelper.cs index 8ecb0c15fe..3986198678 100644 --- a/src/Umbraco.Core/IO/IOHelper.cs +++ b/src/Umbraco.Core/IO/IOHelper.cs @@ -263,55 +263,8 @@ namespace Umbraco.Core.IO /// A safe filename without any path specific chars. internal static string SafeFileName(string filePath) { - if (String.IsNullOrEmpty(filePath)) - return String.Empty; - - if (!String.IsNullOrWhiteSpace(filePath)) - { - foreach (var character in Path.GetInvalidFileNameChars()) - { - filePath = filePath.Replace(character, '-'); - } - } - else - { - filePath = String.Empty; - } - - //Break up the file in name and extension before applying the UrlReplaceCharacters - var fileNamePart = filePath.Substring(0, filePath.LastIndexOf('.')); - var ext = filePath.Substring(filePath.LastIndexOf('.')); - - //Because the file usually is downloadable as well we check characters against 'UmbracoSettings.UrlReplaceCharacters' - XmlNode replaceChars = UmbracoSettings.UrlReplaceCharacters; - foreach (XmlNode n in replaceChars.SelectNodes("char")) - { - if (n.Attributes.GetNamedItem("org") != null && n.Attributes.GetNamedItem("org").Value != "") - fileNamePart = fileNamePart.Replace(n.Attributes.GetNamedItem("org").Value, XmlHelper.GetNodeValue(n)); - } - - filePath = string.Concat(fileNamePart, ext); - - // Adapted from: http://stackoverflow.com/a/4827510/5018 - // Combined both Reserved Characters and Character Data - // from http://en.wikipedia.org/wiki/Percent-encoding - var stringBuilder = new StringBuilder(); - - const string reservedCharacters = "!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| "; - - foreach (var character in filePath) - { - if (reservedCharacters.IndexOf(character) == -1) - stringBuilder.Append(character); - else - stringBuilder.Append("-"); - } - - // Remove repeating dashes - // From: http://stackoverflow.com/questions/5111967/regex-to-remove-a-specific-repeated-character - var reducedString = Regex.Replace(stringBuilder.ToString(), "-+", "-"); - - return reducedString; + // use string extensions + return filePath.ToSafeFileName(); } } } diff --git a/src/Umbraco.Core/StringExtensions.cs b/src/Umbraco.Core/StringExtensions.cs index f631fa5614..d93ec84ff0 100644 --- a/src/Umbraco.Core/StringExtensions.cs +++ b/src/Umbraco.Core/StringExtensions.cs @@ -759,6 +759,18 @@ namespace Umbraco.Core return ShortStringHelper.ReplaceMany(text, replacements); } + /// + /// Returns a new string in which all occurences of specified characters are replaced by a specified character. + /// + /// The string to filter. + /// The characters to replace. + /// The replacement character. + /// The filtered string. + public static string ReplaceMany(this string text, char[] chars, char replacement) + { + return ShortStringHelper.ReplaceMany(text, chars, replacement); + } + // FORMAT STRINGS // note: LegacyShortStringHelper will produce a 100% backward-compatible output for ToUrlAlias. @@ -996,5 +1008,28 @@ namespace Umbraco.Core { return ShortStringHelper.SplitPascalCasing(phrase, ' '); } + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The safe filename. + public static string ToSafeFileName(this string text) + { + return ShortStringHelper.CleanStringForSafeFileName(text); + } + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The culture. + /// The safe filename. + public static string ToSafeFileName(this string text, CultureInfo culture) + { + return ShortStringHelper.CleanStringForSafeFileName(text, culture); + } } } diff --git a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs index 74fdaa467e..511fb2fdd1 100644 --- a/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs +++ b/src/Umbraco.Core/Strings/DefaultShortStringHelper.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Globalization; using Umbraco.Core.Configuration; @@ -246,6 +247,54 @@ function validateSafeAlias(id, value, immediate, callback) {{ return CleanString(text, CleanStringType.Ascii | CleanStringType.LowerCase | CleanStringType.Url, '-', culture); } + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The safe filename. + /// Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented. + public virtual string CleanStringForSafeFileName(string text) + { + if (string.IsNullOrWhiteSpace(text)) + return string.Empty; + + text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-'); + + var pos = text.LastIndexOf('.'); + var name = pos < 0 ? text : text.Substring(0, pos); + var ext = pos < 0 ? string.Empty : text.Substring(pos + 1); + + name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-'); + ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-'); + + return pos < 0 ? name : (name + "." + ext); + } + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The culture. + /// The safe filename. + public virtual string CleanStringForSafeFileName(string text, CultureInfo culture) + { + if (string.IsNullOrWhiteSpace(text)) + return string.Empty; + + text = text.ReplaceMany(Path.GetInvalidFileNameChars(), '-'); + + var pos = text.LastIndexOf('.'); + var name = pos < 0 ? text : text.Substring(0, pos); + var ext = pos < 0 ? string.Empty : text.Substring(pos + 1); + + name = CleanString(name, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture); + ext = CleanString(ext, CleanStringType.Ascii | CleanStringType.Alias | CleanStringType.LowerCase, '-', culture); + + return pos < 0 ? name : (name + "." + ext); + } + #endregion #region CleanString @@ -872,6 +921,26 @@ function validateSafeAlias(id, value, immediate, callback) {{ return replacements.Aggregate(text, (current, kvp) => current.Replace(kvp.Key, kvp.Value)); } + /// + /// Returns a new string in which all occurences of specified characters are replaced by a specified character. + /// + /// The string to filter. + /// The characters to replace. + /// The replacement character. + /// The filtered string. + public virtual string ReplaceMany(string text, char[] chars, char replacement) + { + // be safe + if (text == null) + throw new ArgumentNullException("text"); + if (chars == null) + throw new ArgumentNullException("chars"); + + // see note above + + return chars.Aggregate(text, (current, c) => current.Replace(c, replacement)); + } + #endregion } } diff --git a/src/Umbraco.Core/Strings/IShortStringHelper.cs b/src/Umbraco.Core/Strings/IShortStringHelper.cs index 8d92a5ac72..478e57298d 100644 --- a/src/Umbraco.Core/Strings/IShortStringHelper.cs +++ b/src/Umbraco.Core/Strings/IShortStringHelper.cs @@ -55,6 +55,25 @@ namespace Umbraco.Core.Strings /// The safe url segment. string CleanStringForUrlSegment(string text, CultureInfo culture); + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The safe filename. + /// Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented. + string CleanStringForSafeFileName(string text); + + /// + /// Cleans a string, in the context of a specified culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The culture. + /// The safe filename. + /// Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented. + string CleanStringForSafeFileName(string text, CultureInfo culture); + /// /// Splits a pascal-cased string by inserting a separator in between each term. /// @@ -72,6 +91,15 @@ namespace Umbraco.Core.Strings /// The filtered string. string ReplaceMany(string text, IDictionary replacements); + /// + /// Returns a new string in which all occurences of specified characters are replaced by a specified character. + /// + /// The string to filter. + /// The characters to replace. + /// The replacement character. + /// The filtered string. + string ReplaceMany(string text, char[] chars, char replacement); + /// /// Cleans a string. /// diff --git a/src/Umbraco.Core/Strings/LegacyShortStringHelper.cs b/src/Umbraco.Core/Strings/LegacyShortStringHelper.cs index 8fccca18cd..552c16a5ac 100644 --- a/src/Umbraco.Core/Strings/LegacyShortStringHelper.cs +++ b/src/Umbraco.Core/Strings/LegacyShortStringHelper.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Globalization; +using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -171,6 +172,83 @@ function isValidAlias(alias) {{ return CleanStringForUrlSegment(text); } + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The safe filename. + /// Legacy says this was used to "overcome an issue when Umbraco is used in IE in an intranet environment" but that issue is not documented. + public string CleanStringForSafeFileName(string text) + { + var filePath = text; + + // ported from Core.IO.IOHelper.SafeFileName() + + if (String.IsNullOrEmpty(filePath)) + return String.Empty; + + if (!String.IsNullOrWhiteSpace(filePath)) + { + foreach (var character in Path.GetInvalidFileNameChars()) + { + filePath = filePath.Replace(character, '-'); + } + } + else + { + filePath = String.Empty; + } + + //Break up the file in name and extension before applying the UrlReplaceCharacters + var fileNamePart = filePath.Substring(0, filePath.LastIndexOf('.')); + var ext = filePath.Substring(filePath.LastIndexOf('.')); + + //Because the file usually is downloadable as well we check characters against 'UmbracoSettings.UrlReplaceCharacters' + XmlNode replaceChars = UmbracoSettings.UrlReplaceCharacters; + foreach (XmlNode n in replaceChars.SelectNodes("char")) + { + if (n.Attributes.GetNamedItem("org") != null && n.Attributes.GetNamedItem("org").Value != "") + fileNamePart = fileNamePart.Replace(n.Attributes.GetNamedItem("org").Value, XmlHelper.GetNodeValue(n)); + } + + filePath = string.Concat(fileNamePart, ext); + + // Adapted from: http://stackoverflow.com/a/4827510/5018 + // Combined both Reserved Characters and Character Data + // from http://en.wikipedia.org/wiki/Percent-encoding + var stringBuilder = new StringBuilder(); + + const string reservedCharacters = "!*'();:@&=+$,/?%#[]-~{}\"<>\\^`| "; + + foreach (var character in filePath) + { + if (reservedCharacters.IndexOf(character) == -1) + stringBuilder.Append(character); + else + stringBuilder.Append("-"); + } + + // Remove repeating dashes + // From: http://stackoverflow.com/questions/5111967/regex-to-remove-a-specific-repeated-character + var reducedString = Regex.Replace(stringBuilder.ToString(), "-+", "-"); + + return reducedString; + } + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// The culture. + /// The safe filename. + /// Legacy does not support culture contexts. + public string CleanStringForSafeFileName(string text, CultureInfo culture) + { + return CleanStringForSafeFileName(text); + } + #endregion #region CleanString @@ -430,6 +508,26 @@ function isValidAlias(alias) {{ return replacements.Aggregate(text, (current, kvp) => current.Replace(kvp.Key, kvp.Value)); } + /// + /// Returns a new string in which all occurences of specified characters are replaced by a specified character. + /// + /// The string to filter. + /// The characters to replace. + /// The replacement character. + /// The filtered string. + public string ReplaceMany(string text, char[] chars, char replacement) + { + // be safe + if (text == null) + throw new ArgumentNullException("text"); + if (chars == null) + throw new ArgumentNullException("chars"); + + // see note above + + return chars.Aggregate(text, (current, c) => current.Replace(c, replacement)); + } + #endregion } } diff --git a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs index 98028fd285..b1be1cd6cb 100644 --- a/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs +++ b/src/Umbraco.Tests/CoreStrings/DefaultShortStringHelperTests.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using NUnit.Framework; using Umbraco.Core; @@ -43,18 +44,18 @@ namespace Umbraco.Tests.CoreStrings static readonly Regex FrenchElisionsRegex = new Regex("\\b(c|d|j|l|m|n|qu|s|t)('|\u8217)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private string FilterFrenchElisions(string s) + private static string FilterFrenchElisions(string s) { return FrenchElisionsRegex.Replace(s, ""); } - private string StripQuotes(string s) + private static string StripQuotes(string s) { s = s.ReplaceMany(new Dictionary {{"'", ""}, {"\u8217", ""}}); return s; } - private string WhiteQuotes(string s) + private static string WhiteQuotes(string s) { s = s.ReplaceMany(new Dictionary { { "'", " " }, { "\u8217", " " } }); return s; @@ -323,5 +324,52 @@ namespace Umbraco.Tests.CoreStrings var output = _helper.CleanString(input, caseType | CleanStringType.Ascii, separator); Assert.AreEqual(expected, output); } + + [Test] // can't do cases with an IDictionary + public void ReplaceManyWithCharMap() + { + const string input = "télévisiön tzvâr ßup   pof"; + const string expected = "television tzvar ssup pof"; + IDictionary replacements = new Dictionary + { + { "é", "e" }, + { "ö", "o" }, + { "â", "a" }, + { "ß", "ss" }, + { " ", " " }, + }; + var output = _helper.ReplaceMany(input, replacements); + Assert.AreEqual(expected, output); + } + + #region Cases + [TestCase("val$id!ate|this|str'ing", "$!'", '-', "val-id-ate|this|str-ing")] + [TestCase("val$id!ate|this|str'ing", "$!'", '*', "val*id*ate|this|str*ing")] + #endregion + public void ReplaceManyByOneChar(string input, string toReplace, char replacement, string expected) + { + var output = _helper.ReplaceMany(input, toReplace.ToArray(), replacement); + Assert.AreEqual(expected, output); + } + + #region Cases + [TestCase("foo.txt", "foo.txt")] + [TestCase("foo", "foo")] + [TestCase(".txt", ".txt")] + [TestCase("nag*dog/poo:xit.txt", "nag-dog-poo-xit.txt")] + [TestCase("the dog is in the house.txt", "the-dog-is-in-the-house.txt")] + [TestCase("nil.nil.nil.txt", "nil-nil-nil.txt")] + [TestCase("taradabum", "taradabum")] + [TestCase("tara$$da:b/u replacements = new Dictionary + { + { "é", "e" }, + { "ö", "o" }, + { "â", "a" }, + { "ß", "ss" }, + { " ", " " }, + }; + var output = _helper.ReplaceMany(input, replacements); + Assert.AreEqual(expected, output); + } + + #region Cases + [TestCase("val$id!ate|this|str'ing", "$!'", '-', "val-id-ate|this|str-ing")] + [TestCase("val$id!ate|this|str'ing", "$!'", '*', "val*id*ate|this|str*ing")] + #endregion + public void ReplaceManyByOneChar(string input, string toReplace, char replacement, string expected) + { + var output = _helper.ReplaceMany(input, toReplace.ToArray(), replacement); + Assert.AreEqual(expected, output); + } + + #region Cases + [TestCase("foo.txt", "foo.txt")] + [TestCase("foo", "foo", IgnoreReason = "fails when no extension")] + [TestCase(".txt", ".txt")] + [TestCase("nag*dog/poo:xit.txt", "nag-dog-poo-xit.txt")] + [TestCase("the dog is in the house.txt", "the-dog-is-in-the-house.txt")] + [TestCase("nil.nil.nil.txt", "nilnilnil.txt")] // because of chars map + [TestCase("taradabum", "taradabum", IgnoreReason = "fails when no extension")] + [TestCase("tara$$da:b/u replacements) { - return "REPLACE-MANY::" + text; + return "REPLACE-MANY-A::" + text; + } + + public string ReplaceMany(string text, char[] chars, char replacement) + { + return "REPLACE-MANY-B::" + text; } public string CleanString(string text, CleanStringType stringType) diff --git a/src/Umbraco.Tests/CoreStrings/StringExtensionsTests.cs b/src/Umbraco.Tests/CoreStrings/StringExtensionsTests.cs index 896a572077..1474bd98f9 100644 --- a/src/Umbraco.Tests/CoreStrings/StringExtensionsTests.cs +++ b/src/Umbraco.Tests/CoreStrings/StringExtensionsTests.cs @@ -156,6 +156,20 @@ namespace Umbraco.Tests.CoreStrings Assert.AreEqual("URL-SEGMENT-CULTURE::JUST-ANYTHING", output); } + [Test] + public void ToSafeFileName() + { + var output = "JUST-ANYTHING".ToSafeFileName(); + Assert.AreEqual("SAFE-FILE-NAME::JUST-ANYTHING", output); + } + + [Test] + public void ToSafeFileNameWithCulture() + { + var output = "JUST-ANYTHING".ToSafeFileName(CultureInfo.InvariantCulture); + Assert.AreEqual("SAFE-FILE-NAME-CULTURE::JUST-ANYTHING", output); + } + [Test] public void ConvertCase() { @@ -171,10 +185,17 @@ namespace Umbraco.Tests.CoreStrings } [Test] - public void ReplaceMany() + public void ReplaceManyWithCharMap() { var output = "JUST-ANYTHING".ReplaceMany(null); - Assert.AreEqual("REPLACE-MANY::JUST-ANYTHING", output); + Assert.AreEqual("REPLACE-MANY-A::JUST-ANYTHING", output); + } + + [Test] + public void ReplaceManyByOneChar() + { + var output = "JUST-ANYTHING".ReplaceMany(new char[] {}, '*'); + Assert.AreEqual("REPLACE-MANY-B::JUST-ANYTHING", output); } } }