diff --git a/src/Umbraco.Core/XmlHelper.cs b/src/Umbraco.Core/XmlHelper.cs index 8a217855a7..4f3a63f5ea 100644 --- a/src/Umbraco.Core/XmlHelper.cs +++ b/src/Umbraco.Core/XmlHelper.cs @@ -17,7 +17,72 @@ namespace Umbraco.Core /// public class XmlHelper { - /// + /// + /// Gets a value indicating whether a specified string contains only xml whitespace characters. + /// + /// The string. + /// true if the string contains only xml whitespace characters. + /// As per XML 1.1 specs, space, \t, \r and \n. + public static bool IsXmlWhitespace(string s) + { + // as per xml 1.1 specs - anything else is significant whitespace + s = s.Trim(' ', '\t', '\r', '\n'); + return s.Length == 0; + } + + /// + /// Creates a new XPathDocument from an xml string. + /// + /// The xml string. + /// An XPathDocument created from the xml string. + public static XPathDocument CreateXPathDocument(string xml) + { + return new XPathDocument(new XmlTextReader(new StringReader(xml))); + } + + /// + /// Tries to create a new XPathDocument from an xml string. + /// + /// The xml string. + /// The XPath document. + /// A value indicating whether it has been possible to create the document. + public static bool TryCreateXPathDocument(string xml, out XPathDocument doc) + { + try + { + doc = new XPathDocument(new XmlTextReader(new StringReader(xml))); + return true; + } + catch (Exception) + { + doc = null; + return false; + } + } + + /// + /// Tries to create a new XPathDocument from a property value. + /// + /// The alias of the property. + /// The value of the property. + /// The XPath document. + /// A value indicating whether it has been possible to create the document. + public static bool TryCreateXPathDocumentFromPropertyValue(string alias, object value, out XPathDocument doc) + { + // In addition, DynamicNode strips dashes in elements or attributes + // names but really, this is ugly enough, and using dashes should be + // illegal in content type or property aliases anyway. + + doc = null; + var xml = value as string; + if (xml == null) return false; + xml = xml.Trim(); + if (xml.StartsWith("<") == false || xml.EndsWith(">") == false || xml.Contains('/') == false) return false; + if (UmbracoSettings.NotDynamicXmlDocumentElements.Any(x => x.InvariantEquals(alias))) return false; + return TryCreateXPathDocument(xml, out doc); + } + + /// /// Sorts the children of the parentNode that match the xpath selector /// /// @@ -72,7 +137,7 @@ namespace Umbraco.Core } } } - + public static string StripDashesInElementOrAttributeNames(string xml) { using (var outputms = new MemoryStream()) @@ -126,6 +191,7 @@ namespace Umbraco.Core } } + /// /// Imports a XML node from text. /// @@ -222,11 +288,11 @@ namespace Umbraco.Core /// public static bool CouldItBeXml(string xml) { - if (!string.IsNullOrEmpty(xml)) + if (string.IsNullOrEmpty(xml) == false) { xml = xml.Trim(); - if (xml.StartsWith("<") && xml.EndsWith(">")) + if (xml.StartsWith("<") && xml.EndsWith(">") && xml.Contains("/")) { return true; } diff --git a/src/Umbraco.Web/DefaultPublishedMediaStore.cs b/src/Umbraco.Web/DefaultPublishedMediaStore.cs index 7155d80d34..ab84aa5f9a 100644 --- a/src/Umbraco.Web/DefaultPublishedMediaStore.cs +++ b/src/Umbraco.Web/DefaultPublishedMediaStore.cs @@ -293,9 +293,9 @@ namespace Umbraco.Web //ok it doesn't exist, we might assume now that Examine didn't index this property because the index is not set up correctly //so before we go loading this from the database, we can check if the alias exists on the content type at all, this information //is cached so will be quicker to look up. - if (dd.Properties.Any(x => x.Alias == "__NodeTypeAlias")) + if (dd.Properties.Any(x => x.Alias == UmbracoContentIndexer.NodeTypeAliasFieldName)) { - var aliasesAndNames = ContentType.GetAliasesAndNames(dd.Properties.First(x => x.Alias.InvariantEquals("__NodeTypeAlias")).Value.ToString()); + var aliasesAndNames = ContentType.GetAliasesAndNames(dd.Properties.First(x => x.Alias.InvariantEquals(UmbracoContentIndexer.NodeTypeAliasFieldName)).Value.ToString()); if (aliasesAndNames != null) { if (!aliasesAndNames.ContainsKey(alias)) @@ -317,7 +317,12 @@ namespace Umbraco.Web } } - return dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals(alias)); + //We've made it here which means that the value is stored in the Examine index. + //We are going to check for a special field however, that is because in some cases we store a 'Raw' + //value in the index such as for xml/html. + var rawValue = dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals("__Raw_" + alias)); + return rawValue + ?? dd.Properties.FirstOrDefault(x => x.Alias.InvariantEquals(alias)); } /// diff --git a/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs b/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs index 5eda0360c2..a4029ec6f3 100644 --- a/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs +++ b/src/Umbraco.Web/umbraco.presentation/umbraco/Search/ExamineEvents.cs @@ -1,5 +1,7 @@ using System.Linq; using Umbraco.Core; +using Examine.Providers; +using Umbraco.Core; using Umbraco.Core.Models; using Umbraco.Core.Services; using Umbraco.Web; @@ -15,6 +17,8 @@ namespace umbraco.presentation.umbraco.Search public class ExamineEvents : IApplicationEventHandler { + private const string RawFieldPrefix = "__Raw_"; + public void OnApplicationInitialized(UmbracoApplicationBase umbracoApplication, ApplicationContext applicationContext) { } @@ -51,12 +55,13 @@ namespace umbraco.presentation.umbraco.Search var contentIndexer = ExamineManager.Instance.IndexProviderCollection["InternalIndexer"] as UmbracoContentIndexer; if (contentIndexer != null) { - contentIndexer.DocumentWriting += indexer_DocumentWriting; + contentIndexer.GatheringNodeData += ContentIndexerGatheringNodeData; + contentIndexer.DocumentWriting += IndexerDocumentWriting; } var memberIndexer = ExamineManager.Instance.IndexProviderCollection["InternalMemberIndexer"] as UmbracoMemberIndexer; if (memberIndexer != null) { - memberIndexer.DocumentWriting += indexer_DocumentWriting; + memberIndexer.DocumentWriting += IndexerDocumentWriting; } } @@ -136,16 +141,64 @@ namespace umbraco.presentation.umbraco.Search } /// - /// Event handler to create a lower cased version of the node name, this is so we can support case-insensitive searching and still - /// use the Whitespace Analyzer + /// This checks if any user data might be xml/html, if so we will duplicate the field and store the raw value + /// so we can retreive the raw value when required. /// /// /// - void indexer_DocumentWriting(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e) + /// + /// This is regarding this issue: http://issues.umbraco.org/issue/U4-644 + /// The underlying UmbracoContentIndexer strips the HTML values before this event is even fired + /// so we need to check in the underlying 'node' document for the value. + /// + static void ContentIndexerGatheringNodeData(object sender, IndexingNodeDataEventArgs e) { - if (e.Fields.Keys.Contains("nodeName")) + var indexer = sender as UmbracoContentIndexer; + if (indexer == null) return; + + //loop through each field that is defined as a UserField for the index + foreach (var field in indexer.IndexerData.UserFields) { - //add the lower cased version + if (e.Fields.ContainsKey(field.Name)) + { + //get the original value from the node + var node = e.Node.Descendants(field.Name).FirstOrDefault(); + if (node == null) continue; + + //check if the node value has html + if (XmlHelper.CouldItBeXml(node.Value)) + { + //First save the raw value to a raw field, we will change the policy of this field by detecting the prefix later + e.Fields[RawFieldPrefix + field.Name] = node.Value; + } + } + } + + } + + /// + /// Event handler to create a lower cased version of the node name, this is so we can support case-insensitive searching and still + /// use the Whitespace Analyzer. This also ensures the 'Raw' values are added to the document. + /// + /// + /// + static void IndexerDocumentWriting(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e) + { + //This ensures that the special __Raw_ fields are indexed + var d = e.Document; + foreach (var f in e.Fields.Where(x => x.Key.StartsWith(RawFieldPrefix))) + { + d.Add(new Field( + f.Key, + f.Value, + Field.Store.YES, + Field.Index.NO, //don't index this field, we never want to search by it + Field.TermVector.NO)); + } + + //add the lower cased version + if (e.Fields.Keys.Contains("nodeName")) + { e.Document.Add(new Field("__nodeName", e.Fields["nodeName"].ToLower(), Field.Store.YES, diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs index 19a3762e7d..348edab9aa 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicNode.cs @@ -790,18 +790,18 @@ namespace umbraco.MacroEngines if (result != null) { //a really rough check to see if this may be valid xml - if (sResult.StartsWith("<") && sResult.EndsWith(">") && sResult.Contains("/")) + if (XmlHelper.CouldItBeXml(sResult)) { try { - XElement e = XElement.Parse(DynamicXml.StripDashesInElementOrAttributeNames(sResult), LoadOptions.None); + XElement e = XElement.Parse(XmlHelper.StripDashesInElementOrAttributeNames(sResult), LoadOptions.None); if (e != null) { //check that the document element is not one of the disallowed elements //allows RTE to still return as html if it's valid xhtml string documentElement = e.Name.LocalName; - if (!UmbracoSettings.NotDynamicXmlDocumentElements.Any(tag => - string.Equals(tag, documentElement, StringComparison.CurrentCultureIgnoreCase))) + if (UmbracoSettings.NotDynamicXmlDocumentElements.Any(tag => + string.Equals(tag, documentElement, StringComparison.CurrentCultureIgnoreCase)) == false) { result = new DynamicXml(e); return true; diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs index 1eff680b30..a44d5e1288 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/DynamicXml.cs @@ -8,6 +8,7 @@ using System.Xml.XPath; using System.Collections; using System.IO; using System.Web; +using Umbraco.Core; namespace umbraco.MacroEngines { @@ -381,9 +382,10 @@ namespace umbraco.MacroEngines return test(this) ? new HtmlString(valueIfTrue) : new HtmlString(valueIfFalse); } + [Obsolete("Use XmlHelper.StripDashesInElementOrAttributeNames instead")] public static string StripDashesInElementOrAttributeNames(string xml) { - return Umbraco.Core.Dynamics.DynamicXml.StripDashesInElementOrAttributeNames(xml); + return XmlHelper.StripDashesInElementOrAttributeNames(xml); } } } diff --git a/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs b/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs index 992b1c0969..0837445426 100644 --- a/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs +++ b/src/umbraco.MacroEngines/RazorDynamicNode/ExamineBackedMedia.cs @@ -467,13 +467,18 @@ namespace umbraco.MacroEngines public IProperty GetProperty(string alias, out bool propertyExists) { string value = null; - if (Values.TryGetValue(alias, out value)) + + //First, try to get the 'raw' value, if that doesn't work try to get the normal one + if (Values.TryGetValue("__Raw_" + alias, out value) + || Values.TryGetValue(alias, out value)) { propertyExists = true; return new PropertyResult(alias, value, Guid.Empty); } + propertyExists = false; return null; } + } } \ No newline at end of file