using System; using System.Collections; using System.Text; using System.Text.RegularExpressions; using System.IO; using Umbraco.Core.Configuration; namespace umbraco.cms.helpers { /// /// Summary description for xhtml. /// public class xhtml { public xhtml() { // // TODO: Add constructor logic here // } public static string TidyHtml(string html) { TidyNet.Tidy tidy = new TidyNet.Tidy(); /* Set the options you want */ tidy.Options.DocType = TidyNet.DocType.Strict; tidy.Options.DropFontTags = true; tidy.Options.LogicalEmphasis = true; if (GlobalSettings.EditXhtmlMode == "true") { tidy.Options.Xhtml = true; tidy.Options.XmlOut = true; } else { tidy.Options.XmlOut = false; tidy.Options.Xhtml = false; } tidy.Options.MakeClean = true; tidy.Options.TidyMark = false; // To avoid entity encoding tidy.Options.CharEncoding = (TidyNet.CharEncoding)Enum.Parse(typeof(TidyNet.CharEncoding), UmbracoConfig.For.UmbracoSettings().Content.TidyCharEncoding); /* Declare the parameters that is needed */ TidyNet.TidyMessageCollection tmc = new TidyNet.TidyMessageCollection(); MemoryStream input = new MemoryStream(); MemoryStream output = new MemoryStream(); byte[] byteArray = System.Text.Encoding.UTF8.GetBytes(html); input.Write(byteArray, 0, byteArray.Length); input.Position = 0; tidy.Parse(input, output, tmc); string tidyed = System.Text.Encoding.UTF8.GetString(output.ToArray()); // only return body string regex = @"]*>"; System.Text.RegularExpressions.RegexOptions options = ((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Multiline) | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(regex, options); string[] s = reg.Split(tidyed); if (s.Length > 1) return s[1]; else return "[tidy error]"; } public static string BootstrapTidy(string html) { string emptyTags = ",br,hr,input,img,"; string regex = "(<[^\\?][^(>| )]*>)|<([^\\?][^(>| )]*)([^>]*)>"; Hashtable replaceTag = new Hashtable(); replaceTag.Add("strong", "b"); replaceTag.Add("em", "i"); System.Text.RegularExpressions.RegexOptions options = ((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Multiline) | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(regex, options); foreach (Match m in reg.Matches(html)) { string orgTag = ""; string tag = ""; string cleanTag = ""; if (m.Groups.Count < 2 || (m.Groups[2].Value.ToLower() != "img" || (m.Groups[2].Value.ToLower() == "img" && m.Value.IndexOf ("?UMBRACO_MACRO") == -1))) { if (m.Groups[1].Value != "") { orgTag = m.Groups[1].Value; cleanTag = replaceTags(m.Groups[1].Value.ToLower().Replace("<", "").Replace("/>", "").Replace(">", "").Trim(), replaceTag); tag = "<" + cleanTag + ">"; } else { orgTag = "<" + m.Groups[2].Value + m.Groups[3].Value + ">"; // loop through the attributes and make them lowercase cleanTag = replaceTags(m.Groups[2].Value.ToLower(), replaceTag); tag = "<" + cleanTag + returnLowerCaseAttributes(m.Groups[3].Value) + ">"; } // Check for empty tags if (bool.Parse(GlobalSettings.EditXhtmlMode) && emptyTags.IndexOf(","+cleanTag+",") > -1 && tag.IndexOf("/>") == -1) tag = tag.Replace(">", " />"); html = html.Replace(orgTag, tag); } } return html; } private static string replaceTags(string tag, Hashtable replaceTag) { string closeBracket = ""; if (tag.Substring(0,1) == "/") { closeBracket = "/"; tag = tag.Substring(1, tag.Length-1); } if (replaceTag.ContainsKey(tag)) return closeBracket+replaceTag[tag].ToString(); else return closeBracket+tag; } [Obsolete("Has been superceded by Umbraco.Core.XmlHelper.GetAttributesFromElement")] public static Hashtable ReturnAttributes(String tag) { var h = new Hashtable(); foreach (var i in Umbraco.Core.XmlHelper.GetAttributesFromElement(tag)) { h.Add(i.Key, i.Value); } return h; } private static string returnLowerCaseAttributes(String tag) { string newTag = ""; MatchCollection m = Regex.Matches(tag, "(?\\S*)=\"(?[^\"]*)\"|(?\\S*)=(?[^( |>)]*)(>| )", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); foreach (System.Text.RegularExpressions.Match attributeSet in m) newTag += " " + attributeSet.Groups["attributeName"].Value.ToString().ToLower() + "=\"" + attributeSet.Groups["attributeValue"].Value.ToString() + "\""; return newTag; } // helper method gotten from: // http://stackoverflow.com/questions/20762/how-do-you-remove-invalid-hexadecimal-characters-from-an-xml-based-data-source-p#comment8130028_641632 internal static string RemoveIllegalXmlCharacters(string inString) { if (inString == null) return null; StringBuilder sbOutput = new StringBuilder(); char ch; for (int i = 0; i < inString.Length; i++) { ch = inString[i]; if ((ch >= 0x0020 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) || ch == 0x0009 || ch == 0x000A || ch == 0x000D) { sbOutput.Append(ch); } } return sbOutput.ToString(); } } }