From ca99319eadd83ea31bb9b2a72923513d97582939 Mon Sep 17 00:00:00 2001 From: Shannon Deminick Date: Mon, 6 May 2013 15:09:24 -1000 Subject: [PATCH] Fixes: #U4-644 - DynamicNode and MVC don't output RTE values for media --- src/Umbraco.Core/Dynamics/DynamicXml.cs | 51 +------ src/Umbraco.Core/XmlHelper.cs | 140 ++++++++++++++++-- src/Umbraco.Web/DefaultPublishedMediaStore.cs | 14 +- .../umbraco/Search/ExamineEvents.cs | 67 ++++++++- .../RazorDynamicNode/DynamicNode.cs | 8 +- .../RazorDynamicNode/DynamicXml.cs | 4 +- .../RazorDynamicNode/ExamineBackedMedia.cs | 7 +- 7 files changed, 215 insertions(+), 76 deletions(-) diff --git a/src/Umbraco.Core/Dynamics/DynamicXml.cs b/src/Umbraco.Core/Dynamics/DynamicXml.cs index f75ed76de5..4dda5224a4 100644 --- a/src/Umbraco.Core/Dynamics/DynamicXml.cs +++ b/src/Umbraco.Core/Dynamics/DynamicXml.cs @@ -679,57 +679,10 @@ namespace Umbraco.Core.Dynamics return test(this) ? new HtmlString(valueIfTrue) : new HtmlString(valueIfFalse); } + [Obsolete("Use XmlHelper.StripDashesInElementOrAttributeNames instead")] public static string StripDashesInElementOrAttributeNames(string xml) { - using (MemoryStream outputms = new MemoryStream()) - { - using (TextWriter outputtw = new StreamWriter(outputms)) - { - using (MemoryStream ms = new MemoryStream()) - { - using (TextWriter tw = new StreamWriter(ms)) - { - tw.Write(xml); - tw.Flush(); - ms.Position = 0; - using (TextReader tr = new StreamReader(ms)) - { - bool IsInsideElement = false, IsInsideQuotes = false; - int ic = 0; - while ((ic = tr.Read()) != -1) - { - if (ic == (int)'<' && !IsInsideQuotes) - { - if (tr.Peek() != (int)'!') - { - IsInsideElement = true; - } - } - if (ic == (int)'>' && !IsInsideQuotes) - { - IsInsideElement = false; - } - if (ic == (int)'"') - { - IsInsideQuotes = !IsInsideQuotes; - } - if (!IsInsideElement || ic != (int)'-' || IsInsideQuotes) - { - outputtw.Write((char)ic); - } - } - - } - } - } - outputtw.Flush(); - outputms.Position = 0; - using (TextReader outputtr = new StreamReader(outputms)) - { - return outputtr.ReadToEnd(); - } - } - } + return XmlHelper.StripDashesInElementOrAttributeNames(xml); } diff --git a/src/Umbraco.Core/XmlHelper.cs b/src/Umbraco.Core/XmlHelper.cs index d416a37b58..60b3d85777 100644 --- a/src/Umbraco.Core/XmlHelper.cs +++ b/src/Umbraco.Core/XmlHelper.cs @@ -1,6 +1,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Xml; @@ -16,7 +17,72 @@ namespace Umbraco.Core /// public class XmlHelper { - /// + /// + /// Gets a value indicating whether a specified string contains only xml whitespace characters. + /// + /// The string. + /// true if the string contains only xml whitespace characters. + /// As per XML 1.1 specs, space, \t, \r and \n. + public static bool IsXmlWhitespace(string s) + { + // as per xml 1.1 specs - anything else is significant whitespace + s = s.Trim(' ', '\t', '\r', '\n'); + return s.Length == 0; + } + + /// + /// Creates a new XPathDocument from an xml string. + /// + /// The xml string. + /// An XPathDocument created from the xml string. + public static XPathDocument CreateXPathDocument(string xml) + { + return new XPathDocument(new XmlTextReader(new StringReader(xml))); + } + + /// + /// Tries to create a new XPathDocument from an xml string. + /// + /// The xml string. + /// The XPath document. + /// A value indicating whether it has been possible to create the document. + public static bool TryCreateXPathDocument(string xml, out XPathDocument doc) + { + try + { + doc = new XPathDocument(new XmlTextReader(new StringReader(xml))); + return true; + } + catch (Exception) + { + doc = null; + return false; + } + } + + /// + /// Tries to create a new XPathDocument from a property value. + /// + /// The alias of the property. + /// The value of the property. + /// The XPath document. + /// A value indicating whether it has been possible to create the document. + public static bool TryCreateXPathDocumentFromPropertyValue(string alias, object value, out XPathDocument doc) + { + // In addition, DynamicNode strips dashes in elements or attributes + // names but really, this is ugly enough, and using dashes should be + // illegal in content type or property aliases anyway. + + doc = null; + var xml = value as string; + if (xml == null) return false; + xml = xml.Trim(); + if (xml.StartsWith("<") == false || xml.EndsWith(">") == false || xml.Contains('/') == false) return false; + if (UmbracoSettings.NotDynamicXmlDocumentElements.Any(x => x.InvariantEquals(alias))) return false; + return TryCreateXPathDocument(xml, out doc); + } + + /// /// Sorts the children of the parentNode that match the xpath selector /// /// @@ -71,6 +137,60 @@ namespace Umbraco.Core } } } + + + public static string StripDashesInElementOrAttributeNames(string xml) + { + using (var outputms = new MemoryStream()) + { + using (TextWriter outputtw = new StreamWriter(outputms)) + { + using (var ms = new MemoryStream()) + { + using (var tw = new StreamWriter(ms)) + { + tw.Write(xml); + tw.Flush(); + ms.Position = 0; + using (var tr = new StreamReader(ms)) + { + bool IsInsideElement = false, IsInsideQuotes = false; + int ic = 0; + while ((ic = tr.Read()) != -1) + { + if (ic == (int)'<' && !IsInsideQuotes) + { + if (tr.Peek() != (int)'!') + { + IsInsideElement = true; + } + } + if (ic == (int)'>' && !IsInsideQuotes) + { + IsInsideElement = false; + } + if (ic == (int)'"') + { + IsInsideQuotes = !IsInsideQuotes; + } + if (!IsInsideElement || ic != (int)'-' || IsInsideQuotes) + { + outputtw.Write((char)ic); + } + } + + } + } + } + outputtw.Flush(); + outputms.Position = 0; + using (TextReader outputtr = new StreamReader(outputms)) + { + return outputtr.ReadToEnd(); + } + } + } + } /// /// Imports a XML node from text. @@ -78,7 +198,7 @@ namespace Umbraco.Core /// The text. /// The XML doc. /// - internal static XmlNode ImportXmlNodeFromText(string text, ref XmlDocument xmlDoc) + public static XmlNode ImportXmlNodeFromText(string text, ref XmlDocument xmlDoc) { xmlDoc.LoadXml(text); return xmlDoc.FirstChild; @@ -138,7 +258,7 @@ namespace Umbraco.Core /// The node name. /// The node value. /// A XmlNode - public static XmlNode AddCDataNode(XmlDocument xd, string name, string value) + public static XmlNode AddCDataNode(XmlDocument xd, string name, string value) { var temp = xd.CreateNode(XmlNodeType.Element, name, ""); temp.AppendChild(xd.CreateCDataSection(value)); @@ -150,7 +270,7 @@ namespace Umbraco.Core /// /// The XmlNode. /// the value as a string - internal static string GetNodeValue(XmlNode n) + public static string GetNodeValue(XmlNode n) { var value = string.Empty; if (n == null || n.FirstChild == null) @@ -166,13 +286,13 @@ namespace Umbraco.Core /// /// true if the specified string appears to be XML; otherwise, false. /// - internal static bool CouldItBeXml(string xml) + public static bool CouldItBeXml(string xml) { - if (!string.IsNullOrEmpty(xml)) + if (string.IsNullOrEmpty(xml) == false) { xml = xml.Trim(); - if (xml.StartsWith("<") && xml.EndsWith(">")) + if (xml.StartsWith("<") && xml.EndsWith(">") && xml.Contains("/")) { return true; } @@ -189,7 +309,7 @@ namespace Umbraco.Core /// Name of the root. /// Name of the element. /// Returns an System.Xml.XmlDocument representation of the delimited string data. - internal static XmlDocument Split(string data, string[] separator, string rootName, string elementName) + public static XmlDocument Split(string data, string[] separator, string rootName, string elementName) { return Split(new XmlDocument(), data, separator, rootName, elementName); } @@ -203,7 +323,7 @@ namespace Umbraco.Core /// Name of the root node. /// Name of the element node. /// Returns an System.Xml.XmlDocument representation of the delimited string data. - internal static XmlDocument Split(XmlDocument xml, string data, string[] separator, string rootName, string elementName) + public static XmlDocument Split(XmlDocument xml, string data, string[] separator, string rootName, string elementName) { // load new XML document. xml.LoadXml(string.Concat("<", rootName, "/>")); @@ -232,7 +352,7 @@ namespace Umbraco.Core /// /// /// - internal static Dictionary GetAttributesFromElement(string tag) + public static Dictionary GetAttributesFromElement(string tag) { var m = Regex.Matches(tag, "(?\\S*)=\"(?[^\"]*)\"", diff --git a/src/Umbraco.Web/DefaultPublishedMediaStore.cs b/src/Umbraco.Web/DefaultPublishedMediaStore.cs index d059574397..60dac92742 100644 --- a/src/Umbraco.Web/DefaultPublishedMediaStore.cs +++ b/src/Umbraco.Web/DefaultPublishedMediaStore.cs @@ -11,6 +11,7 @@ using Umbraco.Core; using Umbraco.Core.Dynamics; using Umbraco.Core.Logging; using Umbraco.Core.Models; +using UmbracoExamine; using umbraco; using umbraco.cms.businesslogic; using Examine.LuceneEngine.SearchCriteria; @@ -286,9 +287,9 @@ namespace Umbraco.Web //ok it doesn't exist, we might assume now that Examine didn't index this property because the index is not set up correctly //so before we go loading this from the database, we can check if the alias exists on the content type at all, this information //is cached so will be quicker to look up. - if (dd.Properties.Any(x => x.Alias == "__NodeTypeAlias")) + if (dd.Properties.Any(x => x.Alias == UmbracoContentIndexer.NodeTypeAliasFieldName)) { - var aliasesAndNames = ContentType.GetAliasesAndNames(dd.Properties.First(x => x.Alias.InvariantEquals("__NodeTypeAlias")).Value.ToString()); + var aliasesAndNames = ContentType.GetAliasesAndNames(dd.Properties.First(x => x.Alias.InvariantEquals(UmbracoContentIndexer.NodeTypeAliasFieldName)).Value.ToString()); if (aliasesAndNames != null) { if (!aliasesAndNames.ContainsKey(alias)) @@ -310,7 +311,12 @@ namespace Umbraco.Web } } - return dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals(alias)); + //We've made it here which means that the value is stored in the Examine index. + //We are going to check for a special field however, that is because in some cases we store a 'Raw' + //value in the index such as for xml/html. + var rawValue = dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals("__Raw_" + alias)); + return rawValue + ?? dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals(alias)); } /// @@ -435,7 +441,7 @@ namespace Umbraco.Web ValidateAndSetProperty(valueDictionary, val => SortOrder = int.Parse(val), "sortOrder"); ValidateAndSetProperty(valueDictionary, val => Name = val, "nodeName", "__nodeName"); ValidateAndSetProperty(valueDictionary, val => UrlName = val, "urlName"); - ValidateAndSetProperty(valueDictionary, val => DocumentTypeAlias = val, "nodeTypeAlias", "__NodeTypeAlias"); + ValidateAndSetProperty(valueDictionary, val => DocumentTypeAlias = val, "nodeTypeAlias", UmbracoContentIndexer.NodeTypeAliasFieldName); ValidateAndSetProperty(valueDictionary, val => DocumentTypeId = int.Parse(val), "nodeType"); ValidateAndSetProperty(valueDictionary, val => WriterName = val, "writerName"); ValidateAndSetProperty(valueDictionary, val => CreatorName = val, "creatorName", "writerName"); //this is a bit of a hack fix for: U4-1132 diff --git a/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs b/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs index 1d30d3eb88..f0358f2f34 100644 --- a/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs +++ b/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs @@ -2,6 +2,8 @@ using System.Collections.Generic; using System.Linq; using System.Web; +using Examine.Providers; +using Umbraco.Core; using umbraco.BusinessLogic; using Examine; using UmbracoExamine; @@ -17,32 +19,83 @@ namespace umbraco.presentation.umbraco.Search public class ExamineEvents : IApplicationStartupHandler { + private const string RawFieldPrefix = "__Raw_"; + public ExamineEvents() : base() { var contentIndexer = ExamineManager.Instance.IndexProviderCollection["InternalIndexer"] as UmbracoContentIndexer; if (contentIndexer != null) { - contentIndexer.DocumentWriting += new EventHandler(indexer_DocumentWriting); + contentIndexer.GatheringNodeData += ContentIndexerGatheringNodeData; + contentIndexer.DocumentWriting += IndexerDocumentWriting; } var memberIndexer = ExamineManager.Instance.IndexProviderCollection["InternalMemberIndexer"] as UmbracoMemberIndexer; if (memberIndexer != null) { - memberIndexer.DocumentWriting += new EventHandler(indexer_DocumentWriting); + memberIndexer.DocumentWriting += IndexerDocumentWriting; } } /// - /// Event handler to create a lower cased version of the node name, this is so we can support case-insensitive searching and still - /// use the Whitespace Analyzer + /// This checks if any user data might be xml/html, if so we will duplicate the field and store the raw value + /// so we can retreive the raw value when required. /// /// /// - void indexer_DocumentWriting(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e) + /// + /// This is regarding this issue: http://issues.umbraco.org/issue/U4-644 + /// The underlying UmbracoContentIndexer strips the HTML values before this event is even fired + /// so we need to check in the underlying 'node' document for the value. + /// + static void ContentIndexerGatheringNodeData(object sender, IndexingNodeDataEventArgs e) { - if (e.Fields.Keys.Contains("nodeName")) + var indexer = sender as UmbracoContentIndexer; + if (indexer == null) return; + + //loop through each field that is defined as a UserField for the index + foreach (var field in indexer.IndexerData.UserFields) { - //add the lower cased version + if (e.Fields.ContainsKey(field.Name)) + { + //get the original value from the node + var node = e.Node.Descendants(field.Name).FirstOrDefault(); + if (node == null) continue; + + //check if the node value has html + if (XmlHelper.CouldItBeXml(node.Value)) + { + //First save the raw value to a raw field, we will change the policy of this field by detecting the prefix later + e.Fields[RawFieldPrefix + field.Name] = node.Value; + } + } + } + + } + + /// + /// Event handler to create a lower cased version of the node name, this is so we can support case-insensitive searching and still + /// use the Whitespace Analyzer. This also ensures the 'Raw' values are added to the document. + /// + /// + /// + static void IndexerDocumentWriting(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e) + { + //This ensures that the special __Raw_ fields are indexed + var d = e.Document; + foreach (var f in e.Fields.Where(x => x.Key.StartsWith(RawFieldPrefix))) + { + d.Add(new Field( + f.Key, + f.Value, + Field.Store.YES, + Field.Index.NO, //don't index this field, we never want to search by it + Field.TermVector.NO)); + } + + //add the lower cased version + if (e.Fields.Keys.Contains("nodeName")) + { e.Document.Add(new Field("__nodeName", e.Fields["nodeName"].ToLower(), Field.Store.YES, diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs index a638307d4d..f29c34e935 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs @@ -789,18 +789,18 @@ namespace umbraco.MacroEngines if (result != null) { //a really rough check to see if this may be valid xml - if (sResult.StartsWith("<") && sResult.EndsWith(">") && sResult.Contains("/")) + if (XmlHelper.CouldItBeXml(sResult)) { try { - XElement e = XElement.Parse(DynamicXml.StripDashesInElementOrAttributeNames(sResult), LoadOptions.None); + XElement e = XElement.Parse(XmlHelper.StripDashesInElementOrAttributeNames(sResult), LoadOptions.None); if (e != null) { //check that the document element is not one of the disallowed elements //allows RTE to still return as html if it's valid xhtml string documentElement = e.Name.LocalName; - if (!UmbracoSettings.NotDynamicXmlDocumentElements.Any(tag => - string.Equals(tag, documentElement, StringComparison.CurrentCultureIgnoreCase))) + if (UmbracoSettings.NotDynamicXmlDocumentElements.Any(tag => + string.Equals(tag, documentElement, StringComparison.CurrentCultureIgnoreCase)) == false) { result = new DynamicXml(e); return true; diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs index 1eff680b30..a44d5e1288 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs @@ -8,6 +8,7 @@ using System.Xml.XPath; using System.Collections; using System.IO; using System.Web; +using Umbraco.Core; namespace umbraco.MacroEngines { @@ -381,9 +382,10 @@ namespace umbraco.MacroEngines return test(this) ? new HtmlString(valueIfTrue) : new HtmlString(valueIfFalse); } + [Obsolete("Use XmlHelper.StripDashesInElementOrAttributeNames instead")] public static string StripDashesInElementOrAttributeNames(string xml) { - return Umbraco.Core.Dynamics.DynamicXml.StripDashesInElementOrAttributeNames(xml); + return XmlHelper.StripDashesInElementOrAttributeNames(xml); } } } diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs b/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs index 992b1c0969..0837445426 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs @@ -467,13 +467,18 @@ namespace umbraco.MacroEngines public IProperty GetProperty(string alias, out bool propertyExists) { string value = null; - if (Values.TryGetValue(alias, out value)) + + //First, try to get the 'raw' value, if that doesn't work try to get the normal one + if (Values.TryGetValue("__Raw_" + alias, out value) + || Values.TryGetValue(alias, out value)) { propertyExists = true; return new PropertyResult(alias, value, Guid.Empty); } + propertyExists = false; return null; } + } } \ No newline at end of file