From d148c10c66962873a93ebe1bf91b38da69de64df Mon Sep 17 00:00:00 2001 From: "NielsHartvig@UMBRACORATI.localdomain" Date: Fri, 16 Nov 2012 14:24:24 -0100 Subject: [PATCH] Update to sanitizer --- src/umbraco.cms/helpers/xhtml.cs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/umbraco.cms/helpers/xhtml.cs b/src/umbraco.cms/helpers/xhtml.cs index 8e7d1dd3ad..480256d5f3 100644 --- a/src/umbraco.cms/helpers/xhtml.cs +++ b/src/umbraco.cms/helpers/xhtml.cs @@ -149,25 +149,29 @@ namespace umbraco.cms.helpers return newTag; } + // helper method gotten from: + // http://stackoverflow.com/questions/20762/how-do-you-remove-invalid-hexadecimal-characters-from-an-xml-based-data-source-p#comment8130028_641632 public static string RemoveTroublesomeCharacters(string inString) { + if (inString == null) return null; - StringBuilder newString = new StringBuilder(); + StringBuilder sbOutput = new StringBuilder(); char ch; for (int i = 0; i < inString.Length; i++) { - ch = inString[i]; - // remove any characters outside the valid UTF-8 range as well as all control characters - // except tabs and new lines - if ((ch < 0x00FD && ch > 0x001F) || ch == '\t' || ch == '\n' || ch == '\r') + if ((ch >= 0x0020 && ch <= 0xD7FF) || + (ch >= 0xE000 && ch <= 0xFFFD) || + ch == 0x0009 || + ch == 0x000A || + ch == 0x000D) { - newString.Append(ch); + sbOutput.Append(ch); } } - return newString.ToString(); + return sbOutput.ToString(); } }