From 3d463ad0c5518795f9494e7e9ef3d5c7461fc3fd Mon Sep 17 00:00:00 2001 From: yv01p Date: Sun, 7 Dec 2025 22:06:40 +0000 Subject: [PATCH] refactor: split StringExtensions into 5 partial class files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the 1,600-line StringExtensions.cs into logical categories: - StringExtensions.Culture.cs - invariant comparison methods - StringExtensions.Manipulation.cs - string modification methods - StringExtensions.Encoding.cs - hashing, base64, guid encoding - StringExtensions.Parsing.cs - parsing and detection methods - StringExtensions.Sanitization.cs - XSS, HTML, file path methods No functional changes. All existing tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../Extensions/StringExtensions.Culture.cs | 75 + .../Extensions/StringExtensions.Encoding.cs | 485 +++++ .../StringExtensions.Manipulation.cs | 615 +++++++ .../Extensions/StringExtensions.Parsing.cs | 258 +++ .../StringExtensions.Sanitization.cs | 223 +++ .../Extensions/StringExtensions.cs | 1602 ----------------- 6 files changed, 1656 insertions(+), 1602 deletions(-) create mode 100644 src/Umbraco.Core/Extensions/StringExtensions.Culture.cs create mode 100644 src/Umbraco.Core/Extensions/StringExtensions.Encoding.cs create mode 100644 src/Umbraco.Core/Extensions/StringExtensions.Manipulation.cs create mode 100644 src/Umbraco.Core/Extensions/StringExtensions.Parsing.cs create mode 100644 src/Umbraco.Core/Extensions/StringExtensions.Sanitization.cs delete mode 100644 src/Umbraco.Core/Extensions/StringExtensions.cs diff --git a/src/Umbraco.Core/Extensions/StringExtensions.Culture.cs b/src/Umbraco.Core/Extensions/StringExtensions.Culture.cs new file mode 100644 index 0000000000..251297048a --- /dev/null +++ b/src/Umbraco.Core/Extensions/StringExtensions.Culture.cs @@ -0,0 +1,75 @@ +// Copyright (c) Umbraco. +// See LICENSE for more details. + +using System.Globalization; + +namespace Umbraco.Extensions; + +/// +/// Culture and invariant comparison extensions. +/// +public static partial class StringExtensions +{ + /// + /// formats the string with invariant culture + /// + /// The format. + /// The args. + /// + public static string InvariantFormat(this string? format, params object?[] args) => + string.Format(CultureInfo.InvariantCulture, format ?? string.Empty, args); + + /// + /// Converts an integer to an invariant formatted string + /// + /// + /// + public static string ToInvariantString(this int str) => str.ToString(CultureInfo.InvariantCulture); + + public static string ToInvariantString(this long str) => str.ToString(CultureInfo.InvariantCulture); + + /// + /// Compares 2 strings with invariant culture and case ignored + /// + /// The compare. + /// The compare to. + /// + public static bool InvariantEquals(this string? compare, string? compareTo) => + string.Equals(compare, compareTo, StringComparison.InvariantCultureIgnoreCase); + + public static bool InvariantStartsWith(this string compare, string compareTo) => + compare.StartsWith(compareTo, StringComparison.InvariantCultureIgnoreCase); + + public static bool InvariantEndsWith(this string compare, string compareTo) => + compare.EndsWith(compareTo, StringComparison.InvariantCultureIgnoreCase); + + public static bool InvariantContains(this string compare, string compareTo) => + compare.Contains(compareTo, StringComparison.OrdinalIgnoreCase); + + public static bool InvariantContains(this IEnumerable compare, string compareTo) => + compare.Contains(compareTo, StringComparer.InvariantCultureIgnoreCase); + + public static int InvariantIndexOf(this string s, string value) => + s.IndexOf(value, StringComparison.OrdinalIgnoreCase); + + public static int InvariantLastIndexOf(this string s, string value) => + s.LastIndexOf(value, StringComparison.OrdinalIgnoreCase); + + /// + /// Verifies the provided string is a valid culture code and returns it in a consistent casing. + /// + /// Culture code. + /// Culture code in standard casing. + public static string? EnsureCultureCode(this string? culture) + { + if (string.IsNullOrEmpty(culture) || culture == "*") + { + return culture; + } + + // Create as CultureInfo instance from provided name so we can ensure consistent casing of culture code when persisting. + // This will accept mixed case but once created have a `Name` property that is consistently and correctly cased. + // Will throw in an invalid culture code is provided. + return new CultureInfo(culture).Name; + } +} diff --git a/src/Umbraco.Core/Extensions/StringExtensions.Encoding.cs b/src/Umbraco.Core/Extensions/StringExtensions.Encoding.cs new file mode 100644 index 0000000000..413dad3d33 --- /dev/null +++ b/src/Umbraco.Core/Extensions/StringExtensions.Encoding.cs @@ -0,0 +1,485 @@ +// Copyright (c) Umbraco. +// See LICENSE for more details. + +using System.Security.Cryptography; +using System.Text; + +namespace Umbraco.Extensions; + +/// +/// Encoding, hashing, and serialization extensions. +/// +public static partial class StringExtensions +{ + private static readonly char[] ToCSharpHexDigitLower = "0123456789abcdef".ToCharArray(); + private static readonly char[] ToCSharpEscapeChars; + + /// + /// The namespace for URLs (from RFC 4122, Appendix C). + /// See RFC 4122 + /// + internal static readonly Guid UrlNamespace = new("6ba7b811-9dad-11d1-80b4-00c04fd430c8"); + + static StringExtensions() + { + var escapes = new[] { "\aa", "\bb", "\ff", "\nn", "\rr", "\tt", "\vv", "\"\"", "\\\\", "??", "\00" }; + ToCSharpEscapeChars = new char[escapes.Max(e => e[0]) + 1]; + foreach (var escape in escapes) + { + ToCSharpEscapeChars[escape[0]] = escape[1]; + } + } + + /// + /// Generates a hash of a string based on the FIPS compliance setting. + /// + /// Refers to itself + /// The hashed string + public static string GenerateHash(this string str) => str.ToSHA1(); + + /// + /// Generate a hash of a string based on the specified hash algorithm. + /// + /// The hash algorithm implementation to use. + /// The to hash. + /// + /// The hashed string. + /// + public static string GenerateHash(this string str) + where T : HashAlgorithm => str.GenerateHash(typeof(T).FullName); + + /// + /// Converts the string to SHA1 + /// + /// refers to itself + /// The SHA1 hashed string + public static string ToSHA1(this string stringToConvert) => stringToConvert.GenerateHash("SHA1"); + + /// + /// Encodes a string to a safe URL base64 string + /// + /// + /// + public static string ToUrlBase64(this string input) + { + if (input == null) + { + throw new ArgumentNullException(nameof(input)); + } + + if (string.IsNullOrEmpty(input)) + { + return string.Empty; + } + + // return Convert.ToBase64String(bytes).Replace(".", "-").Replace("/", "_").Replace("=", ","); + var bytes = Encoding.UTF8.GetBytes(input); + return UrlTokenEncode(bytes); + } + + /// + /// Decodes a URL safe base64 string back + /// + /// + /// + public static string? FromUrlBase64(this string input) + { + if (input == null) + { + throw new ArgumentNullException(nameof(input)); + } + + // if (input.IsInvalidBase64()) return null; + try + { + // var decodedBytes = Convert.FromBase64String(input.Replace("-", ".").Replace("_", "/").Replace(",", "=")); + var decodedBytes = UrlTokenDecode(input); + return decodedBytes != null ? Encoding.UTF8.GetString(decodedBytes) : null; + } + catch (FormatException) + { + return null; + } + } + + /// + /// Encodes a string so that it is 'safe' for URLs, files, etc.. + /// + /// + /// + public static string UrlTokenEncode(this byte[] input) + { + if (input == null) + { + throw new ArgumentNullException(nameof(input)); + } + + if (input.Length == 0) + { + return string.Empty; + } + + // base-64 digits are A-Z, a-z, 0-9, + and / + // the = char is used for trailing padding + var str = Convert.ToBase64String(input); + + var pos = str.IndexOf('='); + if (pos < 0) + { + pos = str.Length; + } + + // replace chars that would cause problems in URLs + Span chArray = pos <= 1024 ? stackalloc char[pos] : new char[pos]; + for (var i = 0; i < pos; i++) + { + var ch = str[i]; + switch (ch) + { + case '+': // replace '+' with '-' + chArray[i] = '-'; + break; + + case '/': // replace '/' with '_' + chArray[i] = '_'; + break; + + default: // keep char unchanged + chArray[i] = ch; + break; + } + } + + return new string(chArray); + } + + /// + /// Decodes a string that was encoded with UrlTokenEncode + /// + /// + /// + public static byte[] UrlTokenDecode(this string input) + { + if (input == null) + { + throw new ArgumentNullException(nameof(input)); + } + + if (input.Length == 0) + { + return []; + } + + // calc array size - must be groups of 4 + var arrayLength = input.Length; + var remain = arrayLength % 4; + if (remain != 0) + { + arrayLength += 4 - remain; + } + + var inArray = new char[arrayLength]; + for (var i = 0; i < input.Length; i++) + { + var ch = input[i]; + switch (ch) + { + case '-': // restore '-' as '+' + inArray[i] = '+'; + break; + + case '_': // restore '_' as '/' + inArray[i] = '/'; + break; + + default: // keep char unchanged + inArray[i] = ch; + break; + } + } + + // pad with '=' + for (var j = input.Length; j < inArray.Length; j++) + { + inArray[j] = '='; + } + + return Convert.FromBase64CharArray(inArray, 0, inArray.Length); + } + + /// + /// Converts to hex. + /// + /// The input. + /// + public static string ConvertToHex(this string input) + { + if (string.IsNullOrEmpty(input)) + { + return string.Empty; + } + + var sb = new StringBuilder(input.Length); + foreach (var c in input) + { + sb.AppendFormat("{0:x2}", Convert.ToUInt32(c)); + } + + return sb.ToString(); + } + + public static string DecodeFromHex(this string hexValue) + { + var strValue = string.Empty; + while (hexValue.Length > 0) + { + strValue += Convert.ToChar(Convert.ToUInt32(hexValue[..2], 16)).ToString(); + hexValue = hexValue[2..]; + } + + return strValue; + } + + /// + /// Encodes as GUID. + /// + /// The input. + /// + public static Guid EncodeAsGuid(this string input) + { + if (string.IsNullOrWhiteSpace(input)) + { + throw new ArgumentNullException("input"); + } + + var convertToHex = input.ConvertToHex(); + var hexLength = convertToHex.Length < 32 ? convertToHex.Length : 32; + var hex = convertToHex[..hexLength].PadLeft(32, '0'); + return Guid.TryParse(hex, out Guid output) ? output : Guid.Empty; + } + + /// + /// Converts a string to a Guid - WARNING, depending on the string, this may not be unique + /// + /// + /// + public static Guid ToGuid(this string text) => + CreateGuidFromHash( + UrlNamespace, + text, + CryptoConfig.AllowOnlyFipsAlgorithms ? 5 // SHA1 + : 3); // MD5 + + /// + /// Creates a name-based UUID using the algorithm from RFC 4122 §4.3. + /// See + /// GuidUtility.cs + /// for original implementation. + /// + /// The ID of the namespace. + /// The name (within that namespace). + /// + /// The version number of the UUID to create; this value must be either + /// 3 (for MD5 hashing) or 5 (for SHA-1 hashing). + /// + /// A UUID derived from the namespace and name. + /// + /// See + /// Generating a deterministic GUID + /// . + /// + internal static Guid CreateGuidFromHash(Guid namespaceId, string name, int version) + { + if (name == null) + { + throw new ArgumentNullException("name"); + } + + if (version != 3 && version != 5) + { + throw new ArgumentOutOfRangeException("version", "version must be either 3 or 5."); + } + + // convert the name to a sequence of octets (as defined by the standard or conventions of its namespace) (step 3) + // ASSUME: UTF-8 encoding is always appropriate + var nameBytes = Encoding.UTF8.GetBytes(name); + + // convert the namespace UUID to network order (step 3) + var namespaceBytes = namespaceId.ToByteArray(); + SwapByteOrder(namespaceBytes); + + // comput the hash of the name space ID concatenated with the name (step 4) + byte[] hash; + using (HashAlgorithm algorithm = version == 3 ? MD5.Create() : SHA1.Create()) + { + algorithm.TransformBlock(namespaceBytes, 0, namespaceBytes.Length, null, 0); + algorithm.TransformFinalBlock(nameBytes, 0, nameBytes.Length); + hash = algorithm.Hash!; + } + + // most bytes from the hash are copied straight to the bytes of the new GUID (steps 5-7, 9, 11-12) + Span newGuid = hash.AsSpan()[..16]; + + // set the four most significant bits (bits 12 through 15) of the time_hi_and_version field to the appropriate 4-bit version number from Section 4.1.3 (step 8) + newGuid[6] = (byte)((newGuid[6] & 0x0F) | (version << 4)); + + // set the two most significant bits (bits 6 and 7) of the clock_seq_hi_and_reserved to zero and one, respectively (step 10) + newGuid[8] = (byte)((newGuid[8] & 0x3F) | 0x80); + + // convert the resulting UUID to local byte order (step 13) + SwapByteOrder(newGuid); + return new Guid(newGuid); + } + + // Converts a GUID (expressed as a byte array) to/from network order (MSB-first). + internal static void SwapByteOrder(Span guid) + { + SwapBytes(guid, 0, 3); + SwapBytes(guid, 1, 2); + SwapBytes(guid, 4, 5); + SwapBytes(guid, 6, 7); + } + + private static void SwapBytes(Span guid, int left, int right) => (guid[left], guid[right]) = (guid[right], guid[left]); + + /// + /// Converts a literal string into a C# expression. + /// + /// Current instance of the string. + /// The string in a C# format. + public static string ToCSharpString(this string s) + { + if (s == null) + { + return ""; + } + + // http://stackoverflow.com/questions/323640/can-i-convert-a-c-sharp-string-value-to-an-escaped-string-literal + var sb = new StringBuilder(s.Length + 2); + for (var rp = 0; rp < s.Length; rp++) + { + var c = s[rp]; + if (c < ToCSharpEscapeChars.Length && ToCSharpEscapeChars[c] != '\0') + { + sb.Append('\\').Append(ToCSharpEscapeChars[c]); + } + else if (c <= '~' && c >= ' ') + { + sb.Append(c); + } + else + { + sb.Append(@"\x") + .Append(ToCSharpHexDigitLower[(c >> 12) & 0x0F]) + .Append(ToCSharpHexDigitLower[(c >> 8) & 0x0F]) + .Append(ToCSharpHexDigitLower[(c >> 4) & 0x0F]) + .Append(ToCSharpHexDigitLower[c & 0x0F]); + } + } + + return sb.ToString(); + + // requires full trust + /* + using (var writer = new StringWriter()) + using (var provider = CodeDomProvider.CreateProvider("CSharp")) + { + provider.GenerateCodeFromExpression(new CodePrimitiveExpression(s), writer, null); + return writer.ToString().Replace(string.Format("\" +{0}\t\"", Environment.NewLine), ""); + } + */ + } + + public static string EncodeJsString(this string s) + { + var sb = new StringBuilder(); + foreach (var c in s) + { + switch (c) + { + case '\"': + sb.Append("\\\""); + break; + case '\\': + sb.Append("\\\\"); + break; + case '\b': + sb.Append("\\b"); + break; + case '\f': + sb.Append("\\f"); + break; + case '\n': + sb.Append("\\n"); + break; + case '\r': + sb.Append("\\r"); + break; + case '\t': + sb.Append("\\t"); + break; + default: + int i = c; + if (i < 32 || i > 127) + { + sb.AppendFormat("\\u{0:X04}", i); + } + else + { + sb.Append(c); + } + + break; + } + } + + return sb.ToString(); + } + + /// + /// Generate a hash of a string based on the hashType passed in + /// + /// Refers to itself + /// + /// String with the hash type. See remarks section of the CryptoConfig Class in MSDN docs for a + /// list of possible values. + /// + /// The hashed string + private static string GenerateHash(this string str, string? hashType) + { + HashAlgorithm? hasher = null; + + // create an instance of the correct hashing provider based on the type passed in + if (hashType is not null) + { + hasher = HashAlgorithm.Create(hashType); + } + + if (hasher == null) + { + throw new InvalidOperationException("No hashing type found by name " + hashType); + } + + using (hasher) + { + // convert our string into byte array + var byteArray = Encoding.UTF8.GetBytes(str); + + // get the hashed values created by our selected provider + var hashedByteArray = hasher.ComputeHash(byteArray); + + // create a StringBuilder object + var stringBuilder = new StringBuilder(); + + // loop to each byte + foreach (var b in hashedByteArray) + { + // append it to our StringBuilder + stringBuilder.Append(b.ToString("x2")); + } + + // return the hashed value + return stringBuilder.ToString(); + } + } +} diff --git a/src/Umbraco.Core/Extensions/StringExtensions.Manipulation.cs b/src/Umbraco.Core/Extensions/StringExtensions.Manipulation.cs new file mode 100644 index 0000000000..1401c3085f --- /dev/null +++ b/src/Umbraco.Core/Extensions/StringExtensions.Manipulation.cs @@ -0,0 +1,615 @@ +// Copyright (c) Umbraco. +// See LICENSE for more details. + +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; +using Umbraco.Cms.Core; +using Umbraco.Cms.Core.Strings; + +namespace Umbraco.Extensions; + +/// +/// String manipulation and modification extensions. +/// +public static partial class StringExtensions +{ + internal static readonly Lazy Whitespace = new(() => new Regex(@"\s+", RegexOptions.Compiled)); + + /// + /// Trims the specified value from a string; accepts a string input whereas the in-built implementation only accepts + /// char or char[]. + /// + /// The value. + /// For removing. + /// + public static string Trim(this string value, string forRemoving) + { + if (string.IsNullOrEmpty(value)) + { + return value; + } + + return value.TrimEnd(forRemoving).TrimStart(forRemoving); + } + + public static string TrimEnd(this string value, string forRemoving) + { + if (string.IsNullOrEmpty(value)) + { + return value; + } + + if (string.IsNullOrEmpty(forRemoving)) + { + return value; + } + + while (value.EndsWith(forRemoving, StringComparison.InvariantCultureIgnoreCase)) + { + value = value.Remove(value.LastIndexOf(forRemoving, StringComparison.InvariantCultureIgnoreCase)); + } + + return value; + } + + public static string TrimStart(this string value, string forRemoving) + { + if (string.IsNullOrEmpty(value)) + { + return value; + } + + if (string.IsNullOrEmpty(forRemoving)) + { + return value; + } + + while (value.StartsWith(forRemoving, StringComparison.InvariantCultureIgnoreCase)) + { + value = value[forRemoving.Length..]; + } + + return value; + } + + public static string EnsureStartsWith(this string input, string toStartWith) + { + if (input.StartsWith(toStartWith)) + { + return input; + } + + return toStartWith + input.TrimStart(toStartWith); + } + + public static string EnsureStartsWith(this string input, char value) => + input.StartsWith(value.ToString(CultureInfo.InvariantCulture)) ? input : value + input; + + public static string EnsureEndsWith(this string input, char value) => + input.EndsWith(value.ToString(CultureInfo.InvariantCulture)) ? input : input + value; + + public static string EnsureEndsWith(this string input, string toEndWith) => + input.EndsWith(toEndWith.ToString(CultureInfo.InvariantCulture)) ? input : input + toEndWith; + + /// + /// Returns a copy of the string with the first character converted to uppercase. + /// + /// The string. + /// The converted string. + public static string ToFirstUpper(this string input) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToUpper() + input[1..]; + + /// + /// Returns a copy of the string with the first character converted to lowercase. + /// + /// The string. + /// The converted string. + public static string ToFirstLower(this string input) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToLower() + input[1..]; + + /// + /// Returns a copy of the string with the first character converted to uppercase using the casing rules of the + /// specified culture. + /// + /// The string. + /// The culture. + /// The converted string. + public static string ToFirstUpper(this string input, CultureInfo culture) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToUpper(culture) + input[1..]; + + /// + /// Returns a copy of the string with the first character converted to lowercase using the casing rules of the + /// specified culture. + /// + /// The string. + /// The culture. + /// The converted string. + public static string ToFirstLower(this string input, CultureInfo culture) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToLower(culture) + input[1..]; + + /// + /// Returns a copy of the string with the first character converted to uppercase using the casing rules of the + /// invariant culture. + /// + /// The string. + /// The converted string. + public static string ToFirstUpperInvariant(this string input) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToUpperInvariant() + input[1..]; + + /// + /// Returns a copy of the string with the first character converted to lowercase using the casing rules of the + /// invariant culture. + /// + /// The string. + /// The converted string. + public static string ToFirstLowerInvariant(this string input) => + string.IsNullOrWhiteSpace(input) + ? input + : input[..1].ToLowerInvariant() + input[1..]; + + /// + /// Returns a new string in which all occurrences of specified strings are replaced by other specified strings. + /// + /// The string to filter. + /// The replacements definition. + /// The filtered string. + public static string ReplaceMany(this string text, IDictionary replacements) + { + if (text == null) + { + throw new ArgumentNullException(nameof(text)); + } + + if (replacements == null) + { + throw new ArgumentNullException(nameof(replacements)); + } + + foreach (KeyValuePair item in replacements) + { + text = text.Replace(item.Key, item.Value); + } + + return text; + } + + /// + /// Returns a new string in which all occurrences of specified characters are replaced by a specified character. + /// + /// The string to filter. + /// The characters to replace. + /// The replacement character. + /// The filtered string. + public static string ReplaceMany(this string text, char[] chars, char replacement) + { + if (text == null) + { + throw new ArgumentNullException(nameof(text)); + } + + if (chars == null) + { + throw new ArgumentNullException(nameof(chars)); + } + + for (var i = 0; i < chars.Length; i++) + { + text = text.Replace(chars[i], replacement); + } + + return text; + } + + /// + /// Returns a new string in which only the first occurrence of a specified string is replaced by a specified + /// replacement string. + /// + /// The string to filter. + /// The string to replace. + /// The replacement string. + /// The filtered string. + public static string ReplaceFirst(this string text, string search, string replace) + { + if (text == null) + { + throw new ArgumentNullException(nameof(text)); + } + + ReadOnlySpan spanText = text.AsSpan(); + var pos = spanText.IndexOf(search, StringComparison.InvariantCulture); + + if (pos < 0) + { + return text; + } + + return string.Concat(spanText[..pos], replace.AsSpan(), spanText[(pos + search.Length)..]); + } + + /// + /// An extension method that returns a new string in which all occurrences of a + /// specified string in the current instance are replaced with another specified string. + /// StringComparison specifies the type of search to use for the specified string. + /// + /// Current instance of the string + /// Specified string to replace + /// Specified string to inject + /// String Comparison object to specify search type + /// Updated string + public static string Replace(this string source, string oldString, string newString, StringComparison stringComparison) + { + // This initialization ensures the first check starts at index zero of the source. On successive checks for + // a match, the source is skipped to immediately after the last replaced occurrence for efficiency + // and to avoid infinite loops when oldString and newString compare equal. + var index = -1 * newString.Length; + + // Determine if there are any matches left in source, starting from just after the result of replacing the last match. + while ((index = source.IndexOf(oldString, index + newString.Length, stringComparison)) >= 0) + { + // Remove the old text. + source = source.Remove(index, oldString.Length); + + // Add the replacement text. + source = source.Insert(index, newString); + } + + return source; + } + + public static string ReplaceNonAlphanumericChars(this string input, string replacement) + { + // any character that is not alphanumeric, convert to a hyphen + var mName = input; + foreach (var c in mName.ToCharArray().Where(c => !char.IsLetterOrDigit(c))) + { + mName = mName.Replace(c.ToString(CultureInfo.InvariantCulture), replacement); + } + + return mName; + } + + public static string ReplaceNonAlphanumericChars(this string input, char replacement) + { + var chars = input.ToCharArray(); + for (var i = 0; i < chars.Length; i++) + { + if (!char.IsLetterOrDigit(chars[i])) + { + chars[i] = replacement; + } + } + + return new string(chars); + } + + public static string ExceptChars(this string str, HashSet toExclude) + { + var sb = new StringBuilder(str.Length); + foreach (var c in str.Where(c => toExclude.Contains(c) == false)) + { + sb.Append(c); + } + + return sb.ToString(); + } + + /// + /// Truncates the specified text string. + /// + /// The text. + /// Length of the max. + /// The suffix. + /// + public static string Truncate(this string text, int maxLength, string suffix = "...") + { + // replaces the truncated string to a ... + var truncatedString = text; + + if (maxLength <= 0) + { + return truncatedString; + } + + var strLength = maxLength - suffix.Length; + + if (strLength <= 0) + { + return truncatedString; + } + + if (text == null || text.Length <= maxLength) + { + return truncatedString; + } + + truncatedString = text[..strLength]; + truncatedString = truncatedString.TrimEnd(); + truncatedString += suffix; + + return truncatedString; + } + + /// + /// Removes new lines and tabs + /// + /// + /// + public static string StripWhitespace(this string txt) => Regex.Replace(txt, @"\s", string.Empty); + + /// + /// Strips carrage returns and line feeds from the specified text. + /// + /// The input. + /// + public static string StripNewLines(this string input) => input.Replace("\r", string.Empty).Replace("\n", string.Empty); + + /// + /// Converts to single line by replacing line breaks with spaces. + /// + public static string ToSingleLine(this string text) + { + if (string.IsNullOrEmpty(text)) + { + return text; + } + + text = text.Replace("\r\n", " "); // remove CRLF + text = text.Replace("\r", " "); // remove CR + text = text.Replace("\n", " "); // remove LF + return text; + } + + // this is from SqlMetal and just makes it a bit of fun to allow pluralization + public static string MakePluralName(this string name) + { + if (name.EndsWith("x", StringComparison.OrdinalIgnoreCase) || + name.EndsWith("ch", StringComparison.OrdinalIgnoreCase) || + name.EndsWith("s", StringComparison.OrdinalIgnoreCase) || + name.EndsWith("sh", StringComparison.OrdinalIgnoreCase)) + { + name += "es"; + return name; + } + + if (name.EndsWith("y", StringComparison.OrdinalIgnoreCase) && name.Length > 1 && + !IsVowel(name[^2])) + { + name = name.Remove(name.Length - 1, 1); + name += "ies"; + return name; + } + + if (!name.EndsWith("s", StringComparison.OrdinalIgnoreCase)) + { + name += "s"; + } + + return name; + } + + public static bool IsVowel(this char c) + { + switch (c) + { + case 'O': + case 'U': + case 'Y': + case 'A': + case 'E': + case 'I': + case 'o': + case 'u': + case 'y': + case 'a': + case 'e': + case 'i': + return true; + } + + return false; + } + + public static bool IsLowerCase(this char ch) => ch.ToString(CultureInfo.InvariantCulture) == + ch.ToString(CultureInfo.InvariantCulture).ToLowerInvariant(); + + public static bool IsUpperCase(this char ch) => ch.ToString(CultureInfo.InvariantCulture) == + ch.ToString(CultureInfo.InvariantCulture).ToUpperInvariant(); + + // FORMAT STRINGS + + /// + /// Cleans a string to produce a string that can safely be used in an alias. + /// + /// The text to filter. + /// The short string helper. + /// The safe alias. + public static string ToSafeAlias(this string alias, IShortStringHelper? shortStringHelper) => + shortStringHelper?.CleanStringForSafeAlias(alias) ?? string.Empty; + + /// + /// Cleans a string to produce a string that can safely be used in an alias. + /// + /// The text to filter. + /// A value indicating that we want to camel-case the alias. + /// The short string helper. + /// The safe alias. + public static string ToSafeAlias(this string alias, IShortStringHelper shortStringHelper, bool camel) + { + var a = shortStringHelper.CleanStringForSafeAlias(alias); + if (string.IsNullOrWhiteSpace(a) || camel == false) + { + return a; + } + + return char.ToLowerInvariant(a[0]) + a[1..]; + } + + /// + /// Cleans a string, in the context of a specified culture, to produce a string that can safely be used in an alias. + /// + /// The text to filter. + /// The culture. + /// The short string helper. + /// The safe alias. + public static string ToSafeAlias(this string alias, IShortStringHelper shortStringHelper, string culture) => + shortStringHelper.CleanStringForSafeAlias(alias, culture); + + // the new methods to get a url segment + + /// + /// Cleans a string to produce a string that can safely be used in an url segment. + /// + /// The text to filter. + /// The short string helper. + /// The safe url segment. + public static string ToUrlSegment(this string text, IShortStringHelper shortStringHelper) + { + if (text == null) + { + throw new ArgumentNullException(nameof(text)); + } + + if (string.IsNullOrWhiteSpace(text)) + { + throw new ArgumentException( + "Value can't be empty or consist only of white-space characters.", + nameof(text)); + } + + return shortStringHelper.CleanStringForUrlSegment(text); + } + + /// + /// Cleans a string, in the context of a specified culture, to produce a string that can safely be used in an url + /// segment. + /// + /// The text to filter. + /// The short string helper. + /// The culture. + /// The safe url segment. + public static string ToUrlSegment(this string text, IShortStringHelper shortStringHelper, string? culture) + { + if (text == null) + { + throw new ArgumentNullException(nameof(text)); + } + + if (string.IsNullOrWhiteSpace(text)) + { + throw new ArgumentException( + "Value can't be empty or consist only of white-space characters.", + nameof(text)); + } + + return shortStringHelper.CleanStringForUrlSegment(text, culture); + } + + /// + /// Cleans a string. + /// + /// The text to clean. + /// The short string helper. + /// + /// A flag indicating the target casing and encoding of the string. By default, + /// strings are cleaned up to camelCase and Ascii. + /// + /// The clean string. + /// The string is cleaned in the context of the ICurrent.ShortStringHelper default culture. + public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType) => shortStringHelper.CleanString(text, stringType); + + /// + /// Cleans a string, using a specified separator. + /// + /// The text to clean. + /// The short string helper. + /// + /// A flag indicating the target casing and encoding of the string. By default, + /// strings are cleaned up to camelCase and Ascii. + /// + /// The separator. + /// The clean string. + /// The string is cleaned in the context of the ICurrent.ShortStringHelper default culture. + public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, char separator) => shortStringHelper.CleanString(text, stringType, separator); + + /// + /// Cleans a string in the context of a specified culture. + /// + /// The text to clean. + /// The short string helper. + /// + /// A flag indicating the target casing and encoding of the string. By default, + /// strings are cleaned up to camelCase and Ascii. + /// + /// The culture. + /// The clean string. + public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, string culture) => shortStringHelper.CleanString(text, stringType, culture); + + /// + /// Cleans a string in the context of a specified culture, using a specified separator. + /// + /// The text to clean. + /// The short string helper. + /// + /// A flag indicating the target casing and encoding of the string. By default, + /// strings are cleaned up to camelCase and Ascii. + /// + /// The separator. + /// The culture. + /// The clean string. + public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, char separator, string culture) => + shortStringHelper.CleanString(text, stringType, separator, culture); + + // note: LegacyCurrent.ShortStringHelper will produce 100% backward-compatible output for SplitPascalCasing. + // other helpers may not. DefaultCurrent.ShortStringHelper produces better, but non-compatible, results. + + /// + /// Splits a Pascal cased string into a phrase separated by spaces. + /// + /// The text to split. + /// + /// The split text. + public static string SplitPascalCasing(this string phrase, IShortStringHelper shortStringHelper) => + shortStringHelper.SplitPascalCasing(phrase, ' '); + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a + /// filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// + /// The safe filename. + public static string ToSafeFileName(this string text, IShortStringHelper shortStringHelper) => + shortStringHelper.CleanStringForSafeFileName(text); + + // NOTE: Not sure what this actually does but is used a few places, need to figure it out and then move to StringExtensions and obsolete. + // it basically is yet another version of SplitPascalCasing + // plugging string extensions here to be 99% compatible + // the only diff. is with numbers, Number6Is was "Number6 Is", and the new string helper does it too, + // but the legacy one does "Number6Is"... assuming it is not a big deal. + internal static string SpaceCamelCasing(this string phrase, IShortStringHelper shortStringHelper) => + phrase.Length < 2 ? phrase : phrase.SplitPascalCasing(shortStringHelper).ToFirstUpperInvariant(); + + /// + /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a + /// filename, + /// both internally (on disk) and externally (as a url). + /// + /// The text to filter. + /// + /// The culture. + /// The safe filename. + public static string ToSafeFileName(this string text, IShortStringHelper shortStringHelper, string culture) => + shortStringHelper.CleanStringForSafeFileName(text, culture); +} diff --git a/src/Umbraco.Core/Extensions/StringExtensions.Parsing.cs b/src/Umbraco.Core/Extensions/StringExtensions.Parsing.cs new file mode 100644 index 0000000000..feea76ce85 --- /dev/null +++ b/src/Umbraco.Core/Extensions/StringExtensions.Parsing.cs @@ -0,0 +1,258 @@ +// Copyright (c) Umbraco. +// See LICENSE for more details. + +using System.ComponentModel; +using System.Diagnostics.CodeAnalysis; +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; +using Umbraco.Cms.Core; + +namespace Umbraco.Extensions; + +/// +/// Parsing, detection, and splitting extensions. +/// +public static partial class StringExtensions +{ + internal static readonly string[] JsonEmpties = { "[]", "{}" }; + private const char DefaultEscapedStringEscapeChar = '\\'; + + /// + /// Indicates whether a specified string is null, empty, or + /// consists only of white-space characters. + /// + /// The value to check. + /// + /// Returns if the value is null, + /// empty, or consists only of white-space characters, otherwise + /// returns . + /// + public static bool IsNullOrWhiteSpace([NotNullWhen(false)] this string? value) => string.IsNullOrWhiteSpace(value); + + [return: NotNullIfNotNull("defaultValue")] + public static string? IfNullOrWhiteSpace(this string? str, string? defaultValue) => + str.IsNullOrWhiteSpace() ? defaultValue : str; + + [return: NotNullIfNotNull(nameof(alternative))] + public static string? OrIfNullOrWhiteSpace(this string? input, string? alternative) => + !string.IsNullOrWhiteSpace(input) + ? input + : alternative; + + /// + /// Turns an null-or-whitespace string into a null string. + /// + public static string? NullOrWhiteSpaceAsNull(this string? text) + => string.IsNullOrWhiteSpace(text) ? null : text; + + /// + /// This tries to detect a json string, this is not a fail safe way but it is quicker than doing + /// a try/catch when deserializing when it is not json. + /// + /// + /// + public static bool DetectIsJson(this string input) + { + if (input.IsNullOrWhiteSpace()) + { + return false; + } + + input = input.Trim(); + return (input[0] is '[' && input[^1] is ']') || (input[0] is '{' && input[^1] is '}'); + } + + public static bool DetectIsEmptyJson(this string input) => + JsonEmpties.Contains(Whitespace.Value.Replace(input, string.Empty)); + + /// + /// Tries to parse a string into the supplied type by finding and using the Type's "Parse" method + /// + /// + /// + /// + public static T? ParseInto(this string val) => (T?)val.ParseInto(typeof(T)); + + /// + /// Tries to parse a string into the supplied type by finding and using the Type's "Parse" method + /// + /// + /// + /// + public static object? ParseInto(this string val, Type type) + { + if (string.IsNullOrEmpty(val) == false) + { + TypeConverter tc = TypeDescriptor.GetConverter(type); + return tc.ConvertFrom(val); + } + + return val; + } + + /// enum try parse. + /// The str type. + /// The ignore case. + /// The result. + /// The type + /// The enum try parse. + [SuppressMessage("Microsoft.Design", "CA1031:DoNotCatchGeneralExceptionTypes", Justification = "By Design")] + [SuppressMessage("Microsoft.Design", "CA1021:AvoidOutParameters", MessageId = "2#", Justification = "By Design")] + public static bool EnumTryParse(this string strType, bool ignoreCase, out T? result) + { + try + { + result = (T)Enum.Parse(typeof(T), strType, ignoreCase); + return true; + } + catch + { + result = default; + return false; + } + } + + /// + /// Parse string to Enum + /// + /// The enum type + /// The string to parse + /// The ignore case + /// The parsed enum + [SuppressMessage("Microsoft.Design", "CA1031:DoNotCatchGeneralExceptionTypes", Justification = "By Design")] + [SuppressMessage("Microsoft.Design", "CA1021:AvoidOutParameters", MessageId = "2#", Justification = "By Design")] + public static T EnumParse(this string strType, bool ignoreCase) => (T)Enum.Parse(typeof(T), strType, ignoreCase); + + /// The to delimited list. + /// The list. + /// The delimiter. + /// the list + [SuppressMessage("Microsoft.Design", "CA1026:DefaultParametersShouldNotBeUsed", Justification = "By design")] + public static IList ToDelimitedList(this string list, string delimiter = ",") + { + var delimiters = new[] { delimiter }; + return !list.IsNullOrWhiteSpace() + ? list.Split(delimiters, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) + .ToList() + : new List(); + } + + /// + /// Splits a string with an escape character that allows for the split character to exist in a string + /// + /// The string to split + /// The character to split on + /// The character which can be used to escape the character to split on + /// The string split into substrings delimited by the split character + public static IEnumerable EscapedSplit(this string value, char splitChar, char escapeChar = DefaultEscapedStringEscapeChar) + { + if (value == null) + { + yield break; + } + + var sb = new StringBuilder(value.Length); + var escaped = false; + + foreach (var chr in value.ToCharArray()) + { + if (escaped) + { + escaped = false; + sb.Append(chr); + } + else if (chr == splitChar) + { + yield return sb.ToString(); + sb.Clear(); + } + else if (chr == escapeChar) + { + escaped = true; + } + else + { + sb.Append(chr); + } + } + + yield return sb.ToString(); + } + + /// + /// Checks whether a string "haystack" contains within it any of the strings in the "needles" collection and returns + /// true if it does or false if it doesn't + /// + /// The string to check + /// The collection of strings to check are contained within the first string + /// + /// The type of comparison to perform - defaults to + /// + /// True if any of the needles are contained with haystack; otherwise returns false + /// Added fix to ensure the comparison is used - see http://issues.umbraco.org/issue/U4-11313 + public static bool ContainsAny(this string haystack, IEnumerable needles, StringComparison comparison = StringComparison.CurrentCulture) + { + if (haystack == null) + { + throw new ArgumentNullException("haystack"); + } + + if (string.IsNullOrEmpty(haystack) || needles == null || !needles.Any()) + { + return false; + } + + return needles.Any(value => haystack.IndexOf(value, comparison) >= 0); + } + + public static bool CsvContains(this string csv, string value) + { + if (string.IsNullOrEmpty(csv)) + { + return false; + } + + var idCheckList = csv.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); + return idCheckList.Contains(value); + } + + // having benchmarked various solutions (incl. for/foreach, split and LINQ based ones), + // this is by far the fastest way to find string needles in a string haystack + public static int CountOccurrences(this string haystack, string needle) + => haystack.Length - haystack.Replace(needle, string.Empty).Length; + + /// + /// Convert a path to node ids in the order from right to left (deepest to shallowest). + /// + /// The path string expected as a comma delimited collection of integers. + /// An array of integers matching the provided path. + public static int[] GetIdsFromPathReversed(this string path) + { + ReadOnlySpan pathSpan = path.AsSpan(); + + // Using the explicit enumerator (while/MoveNext) over the SpanSplitEnumerator in a foreach loop to avoid any compiler + // boxing of the ref struct enumerator. + // This prevents potential InvalidProgramException across compilers/JITs ("Cannot create boxed ByRef-like values."). + MemoryExtensions.SpanSplitEnumerator pathSegmentsEnumerator = pathSpan.Split(Constants.CharArrays.Comma); + + List nodeIds = []; + while (pathSegmentsEnumerator.MoveNext()) + { + Range rangeOfPathSegment = pathSegmentsEnumerator.Current; + if (int.TryParse(pathSpan[rangeOfPathSegment], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) + { + nodeIds.Add(pathSegment); + } + } + + var result = new int[nodeIds.Count]; + var resultIndex = 0; + for (int i = nodeIds.Count - 1; i >= 0; i--) + { + result[resultIndex++] = nodeIds[i]; + } + + return result; + } +} diff --git a/src/Umbraco.Core/Extensions/StringExtensions.Sanitization.cs b/src/Umbraco.Core/Extensions/StringExtensions.Sanitization.cs new file mode 100644 index 0000000000..fe31796681 --- /dev/null +++ b/src/Umbraco.Core/Extensions/StringExtensions.Sanitization.cs @@ -0,0 +1,223 @@ +// Copyright (c) Umbraco. +// See LICENSE for more details. + +using System.ComponentModel.DataAnnotations; +using System.Globalization; +using System.Text.RegularExpressions; +using Umbraco.Cms.Core; + +namespace Umbraco.Extensions; + +/// +/// XSS, HTML, file path, and sanitization extensions. +/// +public static partial class StringExtensions +{ + private static readonly char[] CleanForXssChars = "*?(){}[];:%<>/\\|&'\"".ToCharArray(); + + // From: http://stackoverflow.com/a/961504/5018 + // filters control characters but allows only properly-formed surrogate sequences + private static readonly Lazy InvalidXmlChars = new(() => + new Regex( + @"(? + /// Cleans string to aid in preventing xss attacks. + /// + /// + /// + /// + public static string CleanForXss(this string input, params char[] ignoreFromClean) + { + // remove any HTML + input = input.StripHtml(); + + // strip out any potential chars involved with XSS + return input.ExceptChars(new HashSet(CleanForXssChars.Except(ignoreFromClean))); + } + + /// + /// Strips all HTML from a string. + /// + /// The text. + /// Returns the string without any HTML tags. + public static string StripHtml(this string text) + { + const string pattern = @"<(.|\n)*?>"; + return Regex.Replace(text, pattern, string.Empty, RegexOptions.Compiled); + } + + /// + /// An extension method that returns a new string in which all occurrences of an + /// unicode characters that are invalid in XML files are replaced with an empty string. + /// + /// Current instance of the string + /// Updated string + /// + /// removes any unusual unicode characters that can't be encoded into XML + /// + public static string ToValidXmlString(this string text) => + string.IsNullOrEmpty(text) ? text : InvalidXmlChars.Value.Replace(text, string.Empty); + + public static string EscapeRegexSpecialCharacters(this string text) + { + var regexSpecialCharacters = new Dictionary + { + { ".", @"\." }, + { "(", @"\(" }, + { ")", @"\)" }, + { "]", @"\]" }, + { "[", @"\[" }, + { "{", @"\{" }, + { "}", @"\}" }, + { "?", @"\?" }, + { "!", @"\!" }, + { "$", @"\$" }, + { "^", @"\^" }, + { "+", @"\+" }, + { "*", @"\*" }, + { "|", @"\|" }, + { "<", @"\<" }, + { ">", @"\>" }, + }; + return ReplaceMany(text, regexSpecialCharacters); + } + + public static string StripFileExtension(this string fileName) + { + // filenames cannot contain line breaks + if (fileName.Contains('\n') || fileName.Contains('\r')) + { + return fileName; + } + + ReadOnlySpan spanFileName = fileName.AsSpan(); + var lastIndex = spanFileName.LastIndexOf('.'); + if (lastIndex > 0) + { + ReadOnlySpan ext = spanFileName[lastIndex..]; + + // file extensions cannot contain whitespace + if (ext.Contains(' ')) + { + return fileName; + } + + return new string(spanFileName[..lastIndex]); + } + + return fileName; + } + + /// + /// Determines the extension of the path or URL + /// + /// + /// Extension of the file + public static string GetFileExtension(this string file) + { + // Find any characters between the last . and the start of a query string or the end of the string + const string pattern = @"(?\.[^\.\?]+)(\?.*|$)"; + Match match = Regex.Match(file, pattern); + return match.Success + ? match.Groups["extension"].Value + : string.Empty; + } + + /// + /// Ensures that the folder path ends with a DirectorySeparatorChar + /// + /// + /// + public static string NormaliseDirectoryPath(this string currentFolder) + { + currentFolder = currentFolder + .IfNull(x => string.Empty) + .TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; + return currentFolder; + } + + /// + /// Checks if a given path is a full path including drive letter + /// + /// + /// + public static bool IsFullPath(this string path) => Path.IsPathFullyQualified(path); + + /// + /// This will append the query string to the URL + /// + /// + /// + /// + /// + /// This methods ensures that the resulting URL is structured correctly, that there's only one '?' and that things are + /// delimited properly with '&' + /// + public static string AppendQueryStringToUrl(this string url, params string[] queryStrings) + { + // remove any prefixed '&' or '?' + for (var i = 0; i < queryStrings.Length; i++) + { + queryStrings[i] = queryStrings[i].TrimStart(Constants.CharArrays.QuestionMarkAmpersand) + .TrimEnd(Constants.CharArrays.Ampersand); + } + + var nonEmpty = queryStrings.Where(x => !x.IsNullOrWhiteSpace()).ToArray(); + + if (url.Contains('?')) + { + return url + string.Join("&", nonEmpty).EnsureStartsWith('&'); + } + + return url + string.Join("&", nonEmpty).EnsureStartsWith('?'); + } + + /// + /// Converts a file name to a friendly name for a content item + /// + /// + /// + public static string ToFriendlyName(this string fileName) + { + // strip the file extension + fileName = fileName.StripFileExtension(); + + // underscores and dashes to spaces + fileName = fileName.ReplaceMany(Constants.CharArrays.UnderscoreDash, ' '); + + // any other conversions ? + + // Pascalcase (to be done last) + fileName = CultureInfo.InvariantCulture.TextInfo.ToTitleCase(fileName); + + // Replace multiple consecutive spaces with a single space + fileName = string.Join(" ", fileName.Split(Constants.CharArrays.Space, StringSplitOptions.RemoveEmptyEntries)); + + return fileName; + } + + /// + /// Checks whether a string is a valid email address. + /// + /// The string check + /// Returns a bool indicating whether the string is an email address. + public static bool IsEmail(this string? email) => + string.IsNullOrWhiteSpace(email) is false && new EmailAddressAttribute().IsValid(email); + + /// + /// Returns a stream from a string + /// + /// + /// + internal static Stream GenerateStreamFromString(this string s) + { + var stream = new MemoryStream(); + var writer = new StreamWriter(stream); + writer.Write(s); + writer.Flush(); + stream.Position = 0; + return stream; + } +} diff --git a/src/Umbraco.Core/Extensions/StringExtensions.cs b/src/Umbraco.Core/Extensions/StringExtensions.cs deleted file mode 100644 index e887d1131c..0000000000 --- a/src/Umbraco.Core/Extensions/StringExtensions.cs +++ /dev/null @@ -1,1602 +0,0 @@ -// Copyright (c) Umbraco. -// See LICENSE for more details. - -using System.ComponentModel; -using System.ComponentModel.DataAnnotations; -using System.Diagnostics.CodeAnalysis; -using System.Globalization; -using System.Security.Cryptography; -using System.Text; -using System.Text.RegularExpressions; -using Umbraco.Cms.Core; -using Umbraco.Cms.Core.Strings; - -namespace Umbraco.Extensions; - -/// -/// String extension methods -/// -public static class StringExtensions -{ - internal static readonly Lazy Whitespace = new(() => new Regex(@"\s+", RegexOptions.Compiled)); - - private const char DefaultEscapedStringEscapeChar = '\\'; - private static readonly char[] ToCSharpHexDigitLower = "0123456789abcdef".ToCharArray(); - private static readonly char[] ToCSharpEscapeChars; - internal static readonly string[] JsonEmpties = { "[]", "{}" }; - - /// - /// The namespace for URLs (from RFC 4122, Appendix C). - /// See RFC 4122 - /// - internal static readonly Guid UrlNamespace = new("6ba7b811-9dad-11d1-80b4-00c04fd430c8"); - - private static readonly char[] CleanForXssChars = "*?(){}[];:%<>/\\|&'\"".ToCharArray(); - - // From: http://stackoverflow.com/a/961504/5018 - // filters control characters but allows only properly-formed surrogate sequences - private static readonly Lazy InvalidXmlChars = new(() => - new Regex( - @"(? e[0]) + 1]; - foreach (var escape in escapes) - { - ToCSharpEscapeChars[escape[0]] = escape[1]; - } - } - - /// - /// Convert a path to node ids in the order from right to left (deepest to shallowest). - /// - /// The path string expected as a comma delimited collection of integers. - /// An array of integers matching the provided path. - public static int[] GetIdsFromPathReversed(this string path) - { - ReadOnlySpan pathSpan = path.AsSpan(); - - // Using the explicit enumerator (while/MoveNext) over the SpanSplitEnumerator in a foreach loop to avoid any compiler - // boxing of the ref struct enumerator. - // This prevents potential InvalidProgramException across compilers/JITs ("Cannot create boxed ByRef-like values."). - MemoryExtensions.SpanSplitEnumerator pathSegmentsEnumerator = pathSpan.Split(Constants.CharArrays.Comma); - - List nodeIds = []; - while (pathSegmentsEnumerator.MoveNext()) - { - Range rangeOfPathSegment = pathSegmentsEnumerator.Current; - if (int.TryParse(pathSpan[rangeOfPathSegment], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) - { - nodeIds.Add(pathSegment); - } - } - - var result = new int[nodeIds.Count]; - var resultIndex = 0; - for (int i = nodeIds.Count - 1; i >= 0; i--) - { - result[resultIndex++] = nodeIds[i]; - } - - return result; - } - - /// - /// Removes new lines and tabs - /// - /// - /// - public static string StripWhitespace(this string txt) => Regex.Replace(txt, @"\s", string.Empty); - - public static string StripFileExtension(this string fileName) - { - // filenames cannot contain line breaks - if (fileName.Contains('\n') || fileName.Contains('\r')) - { - return fileName; - } - - ReadOnlySpan spanFileName = fileName.AsSpan(); - var lastIndex = spanFileName.LastIndexOf('.'); - if (lastIndex > 0) - { - ReadOnlySpan ext = spanFileName[lastIndex..]; - - // file extensions cannot contain whitespace - if (ext.Contains(' ')) - { - return fileName; - } - - return new string(spanFileName[..lastIndex]); - } - - return fileName; - } - - /// - /// Determines the extension of the path or URL - /// - /// - /// Extension of the file - public static string GetFileExtension(this string file) - { - // Find any characters between the last . and the start of a query string or the end of the string - const string pattern = @"(?\.[^\.\?]+)(\?.*|$)"; - Match match = Regex.Match(file, pattern); - return match.Success - ? match.Groups["extension"].Value - : string.Empty; - } - - /// - /// This tries to detect a json string, this is not a fail safe way but it is quicker than doing - /// a try/catch when deserializing when it is not json. - /// - /// - /// - public static bool DetectIsJson(this string input) - { - if (input.IsNullOrWhiteSpace()) - { - return false; - } - - input = input.Trim(); - return (input[0] is '[' && input[^1] is ']') || (input[0] is '{' && input[^1] is '}'); - } - - public static bool DetectIsEmptyJson(this string input) => - JsonEmpties.Contains(Whitespace.Value.Replace(input, string.Empty)); - - public static string ReplaceNonAlphanumericChars(this string input, string replacement) - { - // any character that is not alphanumeric, convert to a hyphen - var mName = input; - foreach (var c in mName.ToCharArray().Where(c => !char.IsLetterOrDigit(c))) - { - mName = mName.Replace(c.ToString(CultureInfo.InvariantCulture), replacement); - } - - return mName; - } - - public static string ReplaceNonAlphanumericChars(this string input, char replacement) - { - var chars = input.ToCharArray(); - for (var i = 0; i < chars.Length; i++) - { - if (!char.IsLetterOrDigit(chars[i])) - { - chars[i] = replacement; - } - } - - return new string(chars); - } - - /// - /// Cleans string to aid in preventing xss attacks. - /// - /// - /// - /// - public static string CleanForXss(this string input, params char[] ignoreFromClean) - { - // remove any HTML - input = input.StripHtml(); - - // strip out any potential chars involved with XSS - return input.ExceptChars(new HashSet(CleanForXssChars.Except(ignoreFromClean))); - } - - public static string ExceptChars(this string str, HashSet toExclude) - { - var sb = new StringBuilder(str.Length); - foreach (var c in str.Where(c => toExclude.Contains(c) == false)) - { - sb.Append(c); - } - - return sb.ToString(); - } - - /// - /// This will append the query string to the URL - /// - /// - /// - /// - /// - /// This methods ensures that the resulting URL is structured correctly, that there's only one '?' and that things are - /// delimited properly with '&' - /// - public static string AppendQueryStringToUrl(this string url, params string[] queryStrings) - { - // remove any prefixed '&' or '?' - for (var i = 0; i < queryStrings.Length; i++) - { - queryStrings[i] = queryStrings[i].TrimStart(Constants.CharArrays.QuestionMarkAmpersand) - .TrimEnd(Constants.CharArrays.Ampersand); - } - - var nonEmpty = queryStrings.Where(x => !x.IsNullOrWhiteSpace()).ToArray(); - - if (url.Contains('?')) - { - return url + string.Join("&", nonEmpty).EnsureStartsWith('&'); - } - - return url + string.Join("&", nonEmpty).EnsureStartsWith('?'); - } - - /// - /// Returns a stream from a string - /// - /// - /// - internal static Stream GenerateStreamFromString(this string s) - { - var stream = new MemoryStream(); - var writer = new StreamWriter(stream); - writer.Write(s); - writer.Flush(); - stream.Position = 0; - return stream; - } - - // this is from SqlMetal and just makes it a bit of fun to allow pluralization - public static string MakePluralName(this string name) - { - if (name.EndsWith("x", StringComparison.OrdinalIgnoreCase) || - name.EndsWith("ch", StringComparison.OrdinalIgnoreCase) || - name.EndsWith("s", StringComparison.OrdinalIgnoreCase) || - name.EndsWith("sh", StringComparison.OrdinalIgnoreCase)) - { - name += "es"; - return name; - } - - if (name.EndsWith("y", StringComparison.OrdinalIgnoreCase) && name.Length > 1 && - !IsVowel(name[^2])) - { - name = name.Remove(name.Length - 1, 1); - name += "ies"; - return name; - } - - if (!name.EndsWith("s", StringComparison.OrdinalIgnoreCase)) - { - name += "s"; - } - - return name; - } - - public static bool IsVowel(this char c) - { - switch (c) - { - case 'O': - case 'U': - case 'Y': - case 'A': - case 'E': - case 'I': - case 'o': - case 'u': - case 'y': - case 'a': - case 'e': - case 'i': - return true; - } - - return false; - } - - /// - /// Trims the specified value from a string; accepts a string input whereas the in-built implementation only accepts - /// char or char[]. - /// - /// The value. - /// For removing. - /// - public static string Trim(this string value, string forRemoving) - { - if (string.IsNullOrEmpty(value)) - { - return value; - } - - return value.TrimEnd(forRemoving).TrimStart(forRemoving); - } - - public static string EncodeJsString(this string s) - { - var sb = new StringBuilder(); - foreach (var c in s) - { - switch (c) - { - case '\"': - sb.Append("\\\""); - break; - case '\\': - sb.Append("\\\\"); - break; - case '\b': - sb.Append("\\b"); - break; - case '\f': - sb.Append("\\f"); - break; - case '\n': - sb.Append("\\n"); - break; - case '\r': - sb.Append("\\r"); - break; - case '\t': - sb.Append("\\t"); - break; - default: - int i = c; - if (i < 32 || i > 127) - { - sb.AppendFormat("\\u{0:X04}", i); - } - else - { - sb.Append(c); - } - - break; - } - } - - return sb.ToString(); - } - - public static string TrimEnd(this string value, string forRemoving) - { - if (string.IsNullOrEmpty(value)) - { - return value; - } - - if (string.IsNullOrEmpty(forRemoving)) - { - return value; - } - - while (value.EndsWith(forRemoving, StringComparison.InvariantCultureIgnoreCase)) - { - value = value.Remove(value.LastIndexOf(forRemoving, StringComparison.InvariantCultureIgnoreCase)); - } - - return value; - } - - public static string TrimStart(this string value, string forRemoving) - { - if (string.IsNullOrEmpty(value)) - { - return value; - } - - if (string.IsNullOrEmpty(forRemoving)) - { - return value; - } - - while (value.StartsWith(forRemoving, StringComparison.InvariantCultureIgnoreCase)) - { - value = value[forRemoving.Length..]; - } - - return value; - } - - public static string EnsureStartsWith(this string input, string toStartWith) - { - if (input.StartsWith(toStartWith)) - { - return input; - } - - return toStartWith + input.TrimStart(toStartWith); - } - - public static string EnsureStartsWith(this string input, char value) => - input.StartsWith(value.ToString(CultureInfo.InvariantCulture)) ? input : value + input; - - public static string EnsureEndsWith(this string input, char value) => - input.EndsWith(value.ToString(CultureInfo.InvariantCulture)) ? input : input + value; - - public static string EnsureEndsWith(this string input, string toEndWith) => - input.EndsWith(toEndWith.ToString(CultureInfo.InvariantCulture)) ? input : input + toEndWith; - - public static bool IsLowerCase(this char ch) => ch.ToString(CultureInfo.InvariantCulture) == - ch.ToString(CultureInfo.InvariantCulture).ToLowerInvariant(); - - public static bool IsUpperCase(this char ch) => ch.ToString(CultureInfo.InvariantCulture) == - ch.ToString(CultureInfo.InvariantCulture).ToUpperInvariant(); - - /// - /// Indicates whether a specified string is null, empty, or - /// consists only of white-space characters. - /// - /// The value to check. - /// - /// Returns if the value is null, - /// empty, or consists only of white-space characters, otherwise - /// returns . - /// - public static bool IsNullOrWhiteSpace([NotNullWhen(false)] this string? value) => string.IsNullOrWhiteSpace(value); - - [return: NotNullIfNotNull("defaultValue")] - public static string? IfNullOrWhiteSpace(this string? str, string? defaultValue) => - str.IsNullOrWhiteSpace() ? defaultValue : str; - - /// The to delimited list. - /// The list. - /// The delimiter. - /// the list - [SuppressMessage("Microsoft.Design", "CA1026:DefaultParametersShouldNotBeUsed", Justification = "By design")] - public static IList ToDelimitedList(this string list, string delimiter = ",") - { - var delimiters = new[] { delimiter }; - return !list.IsNullOrWhiteSpace() - ? list.Split(delimiters, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) - .ToList() - : new List(); - } - - /// enum try parse. - /// The str type. - /// The ignore case. - /// The result. - /// The type - /// The enum try parse. - [SuppressMessage("Microsoft.Design", "CA1031:DoNotCatchGeneralExceptionTypes", Justification = "By Design")] - [SuppressMessage("Microsoft.Design", "CA1021:AvoidOutParameters", MessageId = "2#", Justification = "By Design")] - public static bool EnumTryParse(this string strType, bool ignoreCase, out T? result) - { - try - { - result = (T)Enum.Parse(typeof(T), strType, ignoreCase); - return true; - } - catch - { - result = default; - return false; - } - } - - /// - /// Parse string to Enum - /// - /// The enum type - /// The string to parse - /// The ignore case - /// The parsed enum - [SuppressMessage("Microsoft.Design", "CA1031:DoNotCatchGeneralExceptionTypes", Justification = "By Design")] - [SuppressMessage("Microsoft.Design", "CA1021:AvoidOutParameters", MessageId = "2#", Justification = "By Design")] - public static T EnumParse(this string strType, bool ignoreCase) => (T)Enum.Parse(typeof(T), strType, ignoreCase); - - /// - /// Strips all HTML from a string. - /// - /// The text. - /// Returns the string without any HTML tags. - public static string StripHtml(this string text) - { - const string pattern = @"<(.|\n)*?>"; - return Regex.Replace(text, pattern, string.Empty, RegexOptions.Compiled); - } - - /// - /// Encodes as GUID. - /// - /// The input. - /// - public static Guid EncodeAsGuid(this string input) - { - if (string.IsNullOrWhiteSpace(input)) - { - throw new ArgumentNullException("input"); - } - - var convertToHex = input.ConvertToHex(); - var hexLength = convertToHex.Length < 32 ? convertToHex.Length : 32; - var hex = convertToHex[..hexLength].PadLeft(32, '0'); - return Guid.TryParse(hex, out Guid output) ? output : Guid.Empty; - } - - /// - /// Converts to hex. - /// - /// The input. - /// - public static string ConvertToHex(this string input) - { - if (string.IsNullOrEmpty(input)) - { - return string.Empty; - } - - var sb = new StringBuilder(input.Length); - foreach (var c in input) - { - sb.AppendFormat("{0:x2}", Convert.ToUInt32(c)); - } - - return sb.ToString(); - } - - public static string DecodeFromHex(this string hexValue) - { - var strValue = string.Empty; - while (hexValue.Length > 0) - { - strValue += Convert.ToChar(Convert.ToUInt32(hexValue[..2], 16)).ToString(); - hexValue = hexValue[2..]; - } - - return strValue; - } - - /// - /// Encodes a string to a safe URL base64 string - /// - /// - /// - public static string ToUrlBase64(this string input) - { - if (input == null) - { - throw new ArgumentNullException(nameof(input)); - } - - if (string.IsNullOrEmpty(input)) - { - return string.Empty; - } - - // return Convert.ToBase64String(bytes).Replace(".", "-").Replace("/", "_").Replace("=", ","); - var bytes = Encoding.UTF8.GetBytes(input); - return UrlTokenEncode(bytes); - } - - /// - /// Decodes a URL safe base64 string back - /// - /// - /// - public static string? FromUrlBase64(this string input) - { - if (input == null) - { - throw new ArgumentNullException(nameof(input)); - } - - // if (input.IsInvalidBase64()) return null; - try - { - // var decodedBytes = Convert.FromBase64String(input.Replace("-", ".").Replace("_", "/").Replace(",", "=")); - var decodedBytes = UrlTokenDecode(input); - return decodedBytes != null ? Encoding.UTF8.GetString(decodedBytes) : null; - } - catch (FormatException) - { - return null; - } - } - - /// - /// formats the string with invariant culture - /// - /// The format. - /// The args. - /// - public static string InvariantFormat(this string? format, params object?[] args) => - string.Format(CultureInfo.InvariantCulture, format ?? string.Empty, args); - - /// - /// Converts an integer to an invariant formatted string - /// - /// - /// - public static string ToInvariantString(this int str) => str.ToString(CultureInfo.InvariantCulture); - - public static string ToInvariantString(this long str) => str.ToString(CultureInfo.InvariantCulture); - - /// - /// Compares 2 strings with invariant culture and case ignored - /// - /// The compare. - /// The compare to. - /// - public static bool InvariantEquals(this string? compare, string? compareTo) => - string.Equals(compare, compareTo, StringComparison.InvariantCultureIgnoreCase); - - public static bool InvariantStartsWith(this string compare, string compareTo) => - compare.StartsWith(compareTo, StringComparison.InvariantCultureIgnoreCase); - - public static bool InvariantEndsWith(this string compare, string compareTo) => - compare.EndsWith(compareTo, StringComparison.InvariantCultureIgnoreCase); - - public static bool InvariantContains(this string compare, string compareTo) => - compare.Contains(compareTo, StringComparison.OrdinalIgnoreCase); - - public static bool InvariantContains(this IEnumerable compare, string compareTo) => - compare.Contains(compareTo, StringComparer.InvariantCultureIgnoreCase); - - public static int InvariantIndexOf(this string s, string value) => - s.IndexOf(value, StringComparison.OrdinalIgnoreCase); - - public static int InvariantLastIndexOf(this string s, string value) => - s.LastIndexOf(value, StringComparison.OrdinalIgnoreCase); - - /// - /// Tries to parse a string into the supplied type by finding and using the Type's "Parse" method - /// - /// - /// - /// - public static T? ParseInto(this string val) => (T?)val.ParseInto(typeof(T)); - - /// - /// Tries to parse a string into the supplied type by finding and using the Type's "Parse" method - /// - /// - /// - /// - public static object? ParseInto(this string val, Type type) - { - if (string.IsNullOrEmpty(val) == false) - { - TypeConverter tc = TypeDescriptor.GetConverter(type); - return tc.ConvertFrom(val); - } - - return val; - } - - /// - /// Generates a hash of a string based on the FIPS compliance setting. - /// - /// Refers to itself - /// The hashed string - public static string GenerateHash(this string str) => str.ToSHA1(); - - /// - /// Generate a hash of a string based on the specified hash algorithm. - /// - /// The hash algorithm implementation to use. - /// The to hash. - /// - /// The hashed string. - /// - public static string GenerateHash(this string str) - where T : HashAlgorithm => str.GenerateHash(typeof(T).FullName); - - /// - /// Converts the string to SHA1 - /// - /// refers to itself - /// The SHA1 hashed string - public static string ToSHA1(this string stringToConvert) => stringToConvert.GenerateHash("SHA1"); - - /// - /// Decodes a string that was encoded with UrlTokenEncode - /// - /// - /// - public static byte[] UrlTokenDecode(this string input) - { - if (input == null) - { - throw new ArgumentNullException(nameof(input)); - } - - if (input.Length == 0) - { - return []; - } - - // calc array size - must be groups of 4 - var arrayLength = input.Length; - var remain = arrayLength % 4; - if (remain != 0) - { - arrayLength += 4 - remain; - } - - var inArray = new char[arrayLength]; - for (var i = 0; i < input.Length; i++) - { - var ch = input[i]; - switch (ch) - { - case '-': // restore '-' as '+' - inArray[i] = '+'; - break; - - case '_': // restore '_' as '/' - inArray[i] = '/'; - break; - - default: // keep char unchanged - inArray[i] = ch; - break; - } - } - - // pad with '=' - for (var j = input.Length; j < inArray.Length; j++) - { - inArray[j] = '='; - } - - return Convert.FromBase64CharArray(inArray, 0, inArray.Length); - } - - /// - /// Generate a hash of a string based on the hashType passed in - /// - /// Refers to itself - /// - /// String with the hash type. See remarks section of the CryptoConfig Class in MSDN docs for a - /// list of possible values. - /// - /// The hashed string - private static string GenerateHash(this string str, string? hashType) - { - HashAlgorithm? hasher = null; - - // create an instance of the correct hashing provider based on the type passed in - if (hashType is not null) - { - hasher = HashAlgorithm.Create(hashType); - } - - if (hasher == null) - { - throw new InvalidOperationException("No hashing type found by name " + hashType); - } - - using (hasher) - { - // convert our string into byte array - var byteArray = Encoding.UTF8.GetBytes(str); - - // get the hashed values created by our selected provider - var hashedByteArray = hasher.ComputeHash(byteArray); - - // create a StringBuilder object - var stringBuilder = new StringBuilder(); - - // loop to each byte - foreach (var b in hashedByteArray) - { - // append it to our StringBuilder - stringBuilder.Append(b.ToString("x2")); - } - - // return the hashed value - return stringBuilder.ToString(); - } - } - - /// - /// Encodes a string so that it is 'safe' for URLs, files, etc.. - /// - /// - /// - public static string UrlTokenEncode(this byte[] input) - { - if (input == null) - { - throw new ArgumentNullException(nameof(input)); - } - - if (input.Length == 0) - { - return string.Empty; - } - - // base-64 digits are A-Z, a-z, 0-9, + and / - // the = char is used for trailing padding - var str = Convert.ToBase64String(input); - - var pos = str.IndexOf('='); - if (pos < 0) - { - pos = str.Length; - } - - // replace chars that would cause problems in URLs - Span chArray = pos <= 1024 ? stackalloc char[pos] : new char[pos]; - for (var i = 0; i < pos; i++) - { - var ch = str[i]; - switch (ch) - { - case '+': // replace '+' with '-' - chArray[i] = '-'; - break; - - case '/': // replace '/' with '_' - chArray[i] = '_'; - break; - - default: // keep char unchanged - chArray[i] = ch; - break; - } - } - - return new string(chArray); - } - - /// - /// Ensures that the folder path ends with a DirectorySeparatorChar - /// - /// - /// - public static string NormaliseDirectoryPath(this string currentFolder) - { - currentFolder = currentFolder - .IfNull(x => string.Empty) - .TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar; - return currentFolder; - } - - /// - /// Truncates the specified text string. - /// - /// The text. - /// Length of the max. - /// The suffix. - /// - public static string Truncate(this string text, int maxLength, string suffix = "...") - { - // replaces the truncated string to a ... - var truncatedString = text; - - if (maxLength <= 0) - { - return truncatedString; - } - - var strLength = maxLength - suffix.Length; - - if (strLength <= 0) - { - return truncatedString; - } - - if (text == null || text.Length <= maxLength) - { - return truncatedString; - } - - truncatedString = text[..strLength]; - truncatedString = truncatedString.TrimEnd(); - truncatedString += suffix; - - return truncatedString; - } - - /// - /// Strips carrage returns and line feeds from the specified text. - /// - /// The input. - /// - public static string StripNewLines(this string input) => input.Replace("\r", string.Empty).Replace("\n", string.Empty); - - /// - /// Converts to single line by replacing line breaks with spaces. - /// - public static string ToSingleLine(this string text) - { - if (string.IsNullOrEmpty(text)) - { - return text; - } - - text = text.Replace("\r\n", " "); // remove CRLF - text = text.Replace("\r", " "); // remove CR - text = text.Replace("\n", " "); // remove LF - return text; - } - - [return: NotNullIfNotNull(nameof(alternative))] - public static string? OrIfNullOrWhiteSpace(this string? input, string? alternative) => - !string.IsNullOrWhiteSpace(input) - ? input - : alternative; - - /// - /// Returns a copy of the string with the first character converted to uppercase. - /// - /// The string. - /// The converted string. - public static string ToFirstUpper(this string input) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToUpper() + input[1..]; - - /// - /// Returns a copy of the string with the first character converted to lowercase. - /// - /// The string. - /// The converted string. - public static string ToFirstLower(this string input) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToLower() + input[1..]; - - /// - /// Returns a copy of the string with the first character converted to uppercase using the casing rules of the - /// specified culture. - /// - /// The string. - /// The culture. - /// The converted string. - public static string ToFirstUpper(this string input, CultureInfo culture) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToUpper(culture) + input[1..]; - - /// - /// Returns a copy of the string with the first character converted to lowercase using the casing rules of the - /// specified culture. - /// - /// The string. - /// The culture. - /// The converted string. - public static string ToFirstLower(this string input, CultureInfo culture) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToLower(culture) + input[1..]; - - /// - /// Returns a copy of the string with the first character converted to uppercase using the casing rules of the - /// invariant culture. - /// - /// The string. - /// The converted string. - public static string ToFirstUpperInvariant(this string input) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToUpperInvariant() + input[1..]; - - /// - /// Returns a copy of the string with the first character converted to lowercase using the casing rules of the - /// invariant culture. - /// - /// The string. - /// The converted string. - public static string ToFirstLowerInvariant(this string input) => - string.IsNullOrWhiteSpace(input) - ? input - : input[..1].ToLowerInvariant() + input[1..]; - - /// - /// Returns a new string in which all occurrences of specified strings are replaced by other specified strings. - /// - /// The string to filter. - /// The replacements definition. - /// The filtered string. - public static string ReplaceMany(this string text, IDictionary replacements) - { - if (text == null) - { - throw new ArgumentNullException(nameof(text)); - } - - if (replacements == null) - { - throw new ArgumentNullException(nameof(replacements)); - } - - foreach (KeyValuePair item in replacements) - { - text = text.Replace(item.Key, item.Value); - } - - return text; - } - - /// - /// Returns a new string in which all occurrences of specified characters are replaced by a specified character. - /// - /// The string to filter. - /// The characters to replace. - /// The replacement character. - /// The filtered string. - public static string ReplaceMany(this string text, char[] chars, char replacement) - { - if (text == null) - { - throw new ArgumentNullException(nameof(text)); - } - - if (chars == null) - { - throw new ArgumentNullException(nameof(chars)); - } - - for (var i = 0; i < chars.Length; i++) - { - text = text.Replace(chars[i], replacement); - } - - return text; - } - - /// - /// Returns a new string in which only the first occurrence of a specified string is replaced by a specified - /// replacement string. - /// - /// The string to filter. - /// The string to replace. - /// The replacement string. - /// The filtered string. - public static string ReplaceFirst(this string text, string search, string replace) - { - if (text == null) - { - throw new ArgumentNullException(nameof(text)); - } - - ReadOnlySpan spanText = text.AsSpan(); - var pos = spanText.IndexOf(search, StringComparison.InvariantCulture); - - if (pos < 0) - { - return text; - } - - return string.Concat(spanText[..pos], replace.AsSpan(), spanText[(pos + search.Length)..]); - } - - /// - /// An extension method that returns a new string in which all occurrences of a - /// specified string in the current instance are replaced with another specified string. - /// StringComparison specifies the type of search to use for the specified string. - /// - /// Current instance of the string - /// Specified string to replace - /// Specified string to inject - /// String Comparison object to specify search type - /// Updated string - public static string Replace(this string source, string oldString, string newString, StringComparison stringComparison) - { - // This initialization ensures the first check starts at index zero of the source. On successive checks for - // a match, the source is skipped to immediately after the last replaced occurrence for efficiency - // and to avoid infinite loops when oldString and newString compare equal. - var index = -1 * newString.Length; - - // Determine if there are any matches left in source, starting from just after the result of replacing the last match. - while ((index = source.IndexOf(oldString, index + newString.Length, stringComparison)) >= 0) - { - // Remove the old text. - source = source.Remove(index, oldString.Length); - - // Add the replacement text. - source = source.Insert(index, newString); - } - - return source; - } - - /// - /// Converts a literal string into a C# expression. - /// - /// Current instance of the string. - /// The string in a C# format. - public static string ToCSharpString(this string s) - { - if (s == null) - { - return ""; - } - - // http://stackoverflow.com/questions/323640/can-i-convert-a-c-sharp-string-value-to-an-escaped-string-literal - var sb = new StringBuilder(s.Length + 2); - for (var rp = 0; rp < s.Length; rp++) - { - var c = s[rp]; - if (c < ToCSharpEscapeChars.Length && ToCSharpEscapeChars[c] != '\0') - { - sb.Append('\\').Append(ToCSharpEscapeChars[c]); - } - else if (c <= '~' && c >= ' ') - { - sb.Append(c); - } - else - { - sb.Append(@"\x") - .Append(ToCSharpHexDigitLower[(c >> 12) & 0x0F]) - .Append(ToCSharpHexDigitLower[(c >> 8) & 0x0F]) - .Append(ToCSharpHexDigitLower[(c >> 4) & 0x0F]) - .Append(ToCSharpHexDigitLower[c & 0x0F]); - } - } - - return sb.ToString(); - - // requires full trust - /* - using (var writer = new StringWriter()) - using (var provider = CodeDomProvider.CreateProvider("CSharp")) - { - provider.GenerateCodeFromExpression(new CodePrimitiveExpression(s), writer, null); - return writer.ToString().Replace(string.Format("\" +{0}\t\"", Environment.NewLine), ""); - } - */ - } - - public static string EscapeRegexSpecialCharacters(this string text) - { - var regexSpecialCharacters = new Dictionary - { - { ".", @"\." }, - { "(", @"\(" }, - { ")", @"\)" }, - { "]", @"\]" }, - { "[", @"\[" }, - { "{", @"\{" }, - { "}", @"\}" }, - { "?", @"\?" }, - { "!", @"\!" }, - { "$", @"\$" }, - { "^", @"\^" }, - { "+", @"\+" }, - { "*", @"\*" }, - { "|", @"\|" }, - { "<", @"\<" }, - { ">", @"\>" }, - }; - return ReplaceMany(text, regexSpecialCharacters); - } - - /// - /// Checks whether a string "haystack" contains within it any of the strings in the "needles" collection and returns - /// true if it does or false if it doesn't - /// - /// The string to check - /// The collection of strings to check are contained within the first string - /// - /// The type of comparison to perform - defaults to - /// - /// True if any of the needles are contained with haystack; otherwise returns false - /// Added fix to ensure the comparison is used - see http://issues.umbraco.org/issue/U4-11313 - public static bool ContainsAny(this string haystack, IEnumerable needles, StringComparison comparison = StringComparison.CurrentCulture) - { - if (haystack == null) - { - throw new ArgumentNullException("haystack"); - } - - if (string.IsNullOrEmpty(haystack) || needles == null || !needles.Any()) - { - return false; - } - - return needles.Any(value => haystack.IndexOf(value, comparison) >= 0); - } - - public static bool CsvContains(this string csv, string value) - { - if (string.IsNullOrEmpty(csv)) - { - return false; - } - - var idCheckList = csv.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); - return idCheckList.Contains(value); - } - - /// - /// Converts a file name to a friendly name for a content item - /// - /// - /// - public static string ToFriendlyName(this string fileName) - { - // strip the file extension - fileName = fileName.StripFileExtension(); - - // underscores and dashes to spaces - fileName = fileName.ReplaceMany(Constants.CharArrays.UnderscoreDash, ' '); - - // any other conversions ? - - // Pascalcase (to be done last) - fileName = CultureInfo.InvariantCulture.TextInfo.ToTitleCase(fileName); - - // Replace multiple consecutive spaces with a single space - fileName = string.Join(" ", fileName.Split(Constants.CharArrays.Space, StringSplitOptions.RemoveEmptyEntries)); - - return fileName; - } - - /// - /// An extension method that returns a new string in which all occurrences of an - /// unicode characters that are invalid in XML files are replaced with an empty string. - /// - /// Current instance of the string - /// Updated string - /// - /// removes any unusual unicode characters that can't be encoded into XML - /// - public static string ToValidXmlString(this string text) => - string.IsNullOrEmpty(text) ? text : InvalidXmlChars.Value.Replace(text, string.Empty); - - /// - /// Converts a string to a Guid - WARNING, depending on the string, this may not be unique - /// - /// - /// - public static Guid ToGuid(this string text) => - CreateGuidFromHash( - UrlNamespace, - text, - CryptoConfig.AllowOnlyFipsAlgorithms ? 5 // SHA1 - : 3); // MD5 - - /// - /// Turns an null-or-whitespace string into a null string. - /// - public static string? NullOrWhiteSpaceAsNull(this string? text) - => string.IsNullOrWhiteSpace(text) ? null : text; - - /// - /// Creates a name-based UUID using the algorithm from RFC 4122 §4.3. - /// See - /// GuidUtility.cs - /// for original implementation. - /// - /// The ID of the namespace. - /// The name (within that namespace). - /// - /// The version number of the UUID to create; this value must be either - /// 3 (for MD5 hashing) or 5 (for SHA-1 hashing). - /// - /// A UUID derived from the namespace and name. - /// - /// See - /// Generating a deterministic GUID - /// . - /// - internal static Guid CreateGuidFromHash(Guid namespaceId, string name, int version) - { - if (name == null) - { - throw new ArgumentNullException("name"); - } - - if (version != 3 && version != 5) - { - throw new ArgumentOutOfRangeException("version", "version must be either 3 or 5."); - } - - // convert the name to a sequence of octets (as defined by the standard or conventions of its namespace) (step 3) - // ASSUME: UTF-8 encoding is always appropriate - var nameBytes = Encoding.UTF8.GetBytes(name); - - // convert the namespace UUID to network order (step 3) - var namespaceBytes = namespaceId.ToByteArray(); - SwapByteOrder(namespaceBytes); - - // comput the hash of the name space ID concatenated with the name (step 4) - byte[] hash; - using (HashAlgorithm algorithm = version == 3 ? MD5.Create() : SHA1.Create()) - { - algorithm.TransformBlock(namespaceBytes, 0, namespaceBytes.Length, null, 0); - algorithm.TransformFinalBlock(nameBytes, 0, nameBytes.Length); - hash = algorithm.Hash!; - } - - // most bytes from the hash are copied straight to the bytes of the new GUID (steps 5-7, 9, 11-12) - Span newGuid = hash.AsSpan()[..16]; - - // set the four most significant bits (bits 12 through 15) of the time_hi_and_version field to the appropriate 4-bit version number from Section 4.1.3 (step 8) - newGuid[6] = (byte)((newGuid[6] & 0x0F) | (version << 4)); - - // set the two most significant bits (bits 6 and 7) of the clock_seq_hi_and_reserved to zero and one, respectively (step 10) - newGuid[8] = (byte)((newGuid[8] & 0x3F) | 0x80); - - // convert the resulting UUID to local byte order (step 13) - SwapByteOrder(newGuid); - return new Guid(newGuid); - } - - // Converts a GUID (expressed as a byte array) to/from network order (MSB-first). - internal static void SwapByteOrder(Span guid) - { - SwapBytes(guid, 0, 3); - SwapBytes(guid, 1, 2); - SwapBytes(guid, 4, 5); - SwapBytes(guid, 6, 7); - } - - private static void SwapBytes(Span guid, int left, int right) => (guid[left], guid[right]) = (guid[right], guid[left]); - - /// - /// Checks if a given path is a full path including drive letter - /// - /// - /// - public static bool IsFullPath(this string path) => Path.IsPathFullyQualified(path); - - // FORMAT STRINGS - - /// - /// Cleans a string to produce a string that can safely be used in an alias. - /// - /// The text to filter. - /// The short string helper. - /// The safe alias. - public static string ToSafeAlias(this string alias, IShortStringHelper? shortStringHelper) => - shortStringHelper?.CleanStringForSafeAlias(alias) ?? string.Empty; - - /// - /// Cleans a string to produce a string that can safely be used in an alias. - /// - /// The text to filter. - /// A value indicating that we want to camel-case the alias. - /// The short string helper. - /// The safe alias. - public static string ToSafeAlias(this string alias, IShortStringHelper shortStringHelper, bool camel) - { - var a = shortStringHelper.CleanStringForSafeAlias(alias); - if (string.IsNullOrWhiteSpace(a) || camel == false) - { - return a; - } - - return char.ToLowerInvariant(a[0]) + a[1..]; - } - - /// - /// Cleans a string, in the context of a specified culture, to produce a string that can safely be used in an alias. - /// - /// The text to filter. - /// The culture. - /// The short string helper. - /// The safe alias. - public static string ToSafeAlias(this string alias, IShortStringHelper shortStringHelper, string culture) => - shortStringHelper.CleanStringForSafeAlias(alias, culture); - - // the new methods to get a url segment - - /// - /// Cleans a string to produce a string that can safely be used in an url segment. - /// - /// The text to filter. - /// The short string helper. - /// The safe url segment. - public static string ToUrlSegment(this string text, IShortStringHelper shortStringHelper) - { - if (text == null) - { - throw new ArgumentNullException(nameof(text)); - } - - if (string.IsNullOrWhiteSpace(text)) - { - throw new ArgumentException( - "Value can't be empty or consist only of white-space characters.", - nameof(text)); - } - - return shortStringHelper.CleanStringForUrlSegment(text); - } - - /// - /// Cleans a string, in the context of a specified culture, to produce a string that can safely be used in an url - /// segment. - /// - /// The text to filter. - /// The short string helper. - /// The culture. - /// The safe url segment. - public static string ToUrlSegment(this string text, IShortStringHelper shortStringHelper, string? culture) - { - if (text == null) - { - throw new ArgumentNullException(nameof(text)); - } - - if (string.IsNullOrWhiteSpace(text)) - { - throw new ArgumentException( - "Value can't be empty or consist only of white-space characters.", - nameof(text)); - } - - return shortStringHelper.CleanStringForUrlSegment(text, culture); - } - - /// - /// Cleans a string. - /// - /// The text to clean. - /// The short string helper. - /// - /// A flag indicating the target casing and encoding of the string. By default, - /// strings are cleaned up to camelCase and Ascii. - /// - /// The clean string. - /// The string is cleaned in the context of the ICurrent.ShortStringHelper default culture. - public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType) => shortStringHelper.CleanString(text, stringType); - - /// - /// Cleans a string, using a specified separator. - /// - /// The text to clean. - /// The short string helper. - /// - /// A flag indicating the target casing and encoding of the string. By default, - /// strings are cleaned up to camelCase and Ascii. - /// - /// The separator. - /// The clean string. - /// The string is cleaned in the context of the ICurrent.ShortStringHelper default culture. - public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, char separator) => shortStringHelper.CleanString(text, stringType, separator); - - /// - /// Cleans a string in the context of a specified culture. - /// - /// The text to clean. - /// The short string helper. - /// - /// A flag indicating the target casing and encoding of the string. By default, - /// strings are cleaned up to camelCase and Ascii. - /// - /// The culture. - /// The clean string. - public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, string culture) => shortStringHelper.CleanString(text, stringType, culture); - - /// - /// Cleans a string in the context of a specified culture, using a specified separator. - /// - /// The text to clean. - /// The short string helper. - /// - /// A flag indicating the target casing and encoding of the string. By default, - /// strings are cleaned up to camelCase and Ascii. - /// - /// The separator. - /// The culture. - /// The clean string. - public static string ToCleanString(this string text, IShortStringHelper shortStringHelper, CleanStringType stringType, char separator, string culture) => - shortStringHelper.CleanString(text, stringType, separator, culture); - - // note: LegacyCurrent.ShortStringHelper will produce 100% backward-compatible output for SplitPascalCasing. - // other helpers may not. DefaultCurrent.ShortStringHelper produces better, but non-compatible, results. - - /// - /// Splits a Pascal cased string into a phrase separated by spaces. - /// - /// The text to split. - /// - /// The split text. - public static string SplitPascalCasing(this string phrase, IShortStringHelper shortStringHelper) => - shortStringHelper.SplitPascalCasing(phrase, ' '); - - /// - /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a - /// filename, - /// both internally (on disk) and externally (as a url). - /// - /// The text to filter. - /// - /// The safe filename. - public static string ToSafeFileName(this string text, IShortStringHelper shortStringHelper) => - shortStringHelper.CleanStringForSafeFileName(text); - - // NOTE: Not sure what this actually does but is used a few places, need to figure it out and then move to StringExtensions and obsolete. - // it basically is yet another version of SplitPascalCasing - // plugging string extensions here to be 99% compatible - // the only diff. is with numbers, Number6Is was "Number6 Is", and the new string helper does it too, - // but the legacy one does "Number6Is"... assuming it is not a big deal. - internal static string SpaceCamelCasing(this string phrase, IShortStringHelper shortStringHelper) => - phrase.Length < 2 ? phrase : phrase.SplitPascalCasing(shortStringHelper).ToFirstUpperInvariant(); - - /// - /// Cleans a string, in the context of the invariant culture, to produce a string that can safely be used as a - /// filename, - /// both internally (on disk) and externally (as a url). - /// - /// The text to filter. - /// - /// The culture. - /// The safe filename. - public static string ToSafeFileName(this string text, IShortStringHelper shortStringHelper, string culture) => - shortStringHelper.CleanStringForSafeFileName(text, culture); - - /// - /// Splits a string with an escape character that allows for the split character to exist in a string - /// - /// The string to split - /// The character to split on - /// The character which can be used to escape the character to split on - /// The string split into substrings delimited by the split character - public static IEnumerable EscapedSplit(this string value, char splitChar, char escapeChar = DefaultEscapedStringEscapeChar) - { - if (value == null) - { - yield break; - } - - var sb = new StringBuilder(value.Length); - var escaped = false; - - foreach (var chr in value.ToCharArray()) - { - if (escaped) - { - escaped = false; - sb.Append(chr); - } - else if (chr == splitChar) - { - yield return sb.ToString(); - sb.Clear(); - } - else if (chr == escapeChar) - { - escaped = true; - } - else - { - sb.Append(chr); - } - } - - yield return sb.ToString(); - } - - /// - /// Checks whether a string is a valid email address. - /// - /// The string check - /// Returns a bool indicating whether the string is an email address. - public static bool IsEmail(this string? email) => - string.IsNullOrWhiteSpace(email) is false && new EmailAddressAttribute().IsValid(email); - - // having benchmarked various solutions (incl. for/foreach, split and LINQ based ones), - // this is by far the fastest way to find string needles in a string haystack - public static int CountOccurrences(this string haystack, string needle) - => haystack.Length - haystack.Replace(needle, string.Empty).Length; - - /// - /// Verifies the provided string is a valid culture code and returns it in a consistent casing. - /// - /// Culture code. - /// Culture code in standard casing. - public static string? EnsureCultureCode(this string? culture) - { - if (string.IsNullOrEmpty(culture) || culture == "*") - { - return culture; - } - - // Create as CultureInfo instance from provided name so we can ensure consistent casing of culture code when persisting. - // This will accept mixed case but once created have a `Name` property that is consistently and correctly cased. - // Will throw in an invalid culture code is provided. - return new CultureInfo(culture).Name; - } -}