From 0d11b15b3f716bdb90b75585a3cca2ba037dad55 Mon Sep 17 00:00:00 2001 From: Shannon Date: Wed, 4 May 2016 16:36:43 +0200 Subject: [PATCH] This fixes the LoadContentFromDatabase method - before this was storing every single xml node in memory in dictionaries (x2) and then performing the re-organization of all nodes afterwords to construct a document, now we just read each row and organize the document accordingly, --- .../umbraco.presentation/content.cs | 274 ++++-------------- 1 file changed, 59 insertions(+), 215 deletions(-) diff --git a/src/Umbraco.Web/umbraco.presentation/content.cs b/src/Umbraco.Web/umbraco.presentation/content.cs index f51686d21d..bdd8c91f18 100644 --- a/src/Umbraco.Web/umbraco.presentation/content.cs +++ b/src/Umbraco.Web/umbraco.presentation/content.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.ComponentModel; using System.Globalization; using System.IO; using System.Text; @@ -7,6 +8,8 @@ using System.Threading; using System.Threading.Tasks; using System.Web; using System.Xml; +using System.Xml.Linq; +using System.Xml.XPath; using umbraco.BusinessLogic; using umbraco.cms.businesslogic; using umbraco.cms.businesslogic.web; @@ -243,10 +246,6 @@ namespace umbraco /// The parent node identifier. public void SortNodes(int parentId) { - var childNodesXPath = UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema - ? "./node" - : "./* [@id]"; - using (var safeXml = GetSafeXmlWriter(false)) { var parentNode = parentId == -1 @@ -257,7 +256,7 @@ namespace umbraco var sorted = XmlHelper.SortNodesIfNeeded( parentNode, - childNodesXPath, + ChildNodesXPath, x => x.AttributeValue("sortOrder")); if (sorted == false) return; @@ -489,119 +488,71 @@ namespace umbraco { // Try to log to the DB LogHelper.Info("Loading content from database..."); - - var hierarchy = new Dictionary>(); - var nodeIndex = new Dictionary(); - + try { LogHelper.Debug("Republishing starting"); lock (DbReadSyncLock) { + //TODO: This is what we should do , but converting to use XDocument would be breaking unless we convert + // to XmlDocument at the end of this, but again, this would be bad for memory... though still not nearly as + // bad as what is happening before! + // We'll keep using XmlDocument for now though, but XDocument xml generation is much faster: + // https://blogs.msdn.microsoft.com/codejunkie/2008/10/08/xmldocument-vs-xelement-performance/ + // I think we already have code in here to convert XDocument to XmlDocument but in case we don't here + // it is: https://blogs.msdn.microsoft.com/marcelolr/2009/03/13/fast-way-to-convert-xmldocument-into-xdocument/ - // Lets cache the DTD to save on the DB hit on the subsequent use - string dtd = DocumentType.GenerateDtd(); + //// Prepare an XmlDocument with an appropriate inline DTD to match + //// the expected content + //var parent = new XElement("root", new XAttribute("id", "-1")); + //var xmlDoc = new XDocument( + // new XDocumentType("root", null, null, DocumentType.GenerateDtd()), + // parent); - // Prepare an XmlDocument with an appropriate inline DTD to match - // the expected content var xmlDoc = new XmlDocument(); - InitializeXml(xmlDoc, dtd); + var doctype = xmlDoc.CreateDocumentType("root", null, null, + ApplicationContext.Current.Services.ContentTypeService.GetContentTypesDtd()); + xmlDoc.AppendChild(doctype); + var parent = xmlDoc.CreateElement("root"); + var pIdAtt = xmlDoc.CreateAttribute("id"); + pIdAtt.Value = "-1"; + parent.Attributes.Append(pIdAtt); + xmlDoc.AppendChild(parent); // Esben Carlsen: At some point we really need to put all data access into to a tier of its own. // CLN - added checks that document xml is for a document that is actually published. - string sql = - @"select umbracoNode.id, umbracoNode.parentId, umbracoNode.sortOrder, cmsContentXml.xml from umbracoNode + const string sql = @"select umbracoNode.id, umbracoNode.parentID, umbracoNode.sortOrder, cmsContentXml.xml, umbracoNode.level from umbracoNode inner join cmsContentXml on cmsContentXml.nodeId = umbracoNode.id and umbracoNode.nodeObjectType = @type where umbracoNode.id in (select cmsDocument.nodeId from cmsDocument where cmsDocument.published = 1) -order by umbracoNode.level, umbracoNode.sortOrder"; +order by umbracoNode.level, umbracoNode.parentID, umbracoNode.sortOrder"; - - - using ( - IRecordsReader dr = SqlHelper.ExecuteReader(sql, - SqlHelper.CreateParameter("@type", - new Guid( - Constants.ObjectTypes.Document))) - ) + XmlElement last = null; + var db = ApplicationContext.Current.DatabaseContext.Database; + //NOTE: Query creates a reader - does not load all into memory + foreach (var row in db.Query(sql, new { type = new Guid(Constants.ObjectTypes.Document)})) { - while (dr.Read()) + string parentId = ((int)row.parentID).ToInvariantString(); + string xml = row.xml; + + //if the parentid is changing + if (last != null && last.GetAttribute("parentID") != parentId) { - int currentId = dr.GetInt("id"); - int parentId = dr.GetInt("parentId"); - string xml = dr.GetString("xml"); - - // fix sortOrder - see notes in UpdateSortOrder - var tmp = new XmlDocument(); - tmp.LoadXml(xml); - var attr = tmp.DocumentElement.GetAttributeNode("sortOrder"); - attr.Value = dr.GetInt("sortOrder").ToString(); - xml = tmp.InnerXml; - - // Call the eventhandler to allow modification of the string - var e1 = new ContentCacheLoadNodeEventArgs(); - FireAfterContentCacheDatabaseLoadXmlString(ref xml, e1); - // check if a listener has canceled the event - if (!e1.Cancel) - { - // and parse it into a DOM node - xmlDoc.LoadXml(xml); - XmlNode node = xmlDoc.FirstChild; - // same event handler loader form the xml node - var e2 = new ContentCacheLoadNodeEventArgs(); - FireAfterContentCacheLoadNodeFromDatabase(node, e2); - // and checking if it was canceled again - if (!e1.Cancel) - { - nodeIndex.Add(currentId, node); - - // verify if either of the handlers canceled the children to load - if (!e1.CancelChildren && !e2.CancelChildren) - { - // Build the content hierarchy - List children; - if (!hierarchy.TryGetValue(parentId, out children)) - { - // No children for this parent, so add one - children = new List(); - hierarchy.Add(parentId, children); - } - children.Add(currentId); - } - } - } + parent = xmlDoc.GetElementById(parentId); + if (parent == null) throw new InvalidOperationException("No parent node found in xml doc with id " + parentId); } + + var xmlDocFragment = xmlDoc.CreateDocumentFragment(); + xmlDocFragment.InnerXml = xml; + + last = (XmlElement)parent.AppendChild(xmlDocFragment); } - LogHelper.Debug("Xml Pages loaded"); + LogHelper.Debug("Done republishing Xml Index"); - try - { - // If we got to here we must have successfully retrieved the content from the DB so - // we can safely initialise and compose the final content DOM. - // Note: We are reusing the XmlDocument used to create the xml nodes above so - // we don't have to import them into a new XmlDocument - - // Initialise the document ready for the final composition of content - InitializeXml(xmlDoc, dtd); - - // Start building the content tree recursively from the root (-1) node - GenerateXmlDocument(hierarchy, nodeIndex, -1, xmlDoc.DocumentElement); - - LogHelper.Debug("Done republishing Xml Index"); - - return xmlDoc; - } - catch (Exception ee) - { - LogHelper.Error("Error while generating XmlDocument from database", ee); - } + return xmlDoc; } - } - catch (OutOfMemoryException ee) - { - LogHelper.Error(string.Format("Error Republishing: Out Of Memory. Parents: {0}, Nodes: {1}", hierarchy.Count, nodeIndex.Count), ee); - } + } catch (Exception ee) { LogHelper.Error("Error Republishing", ee); @@ -617,64 +568,11 @@ order by umbracoNode.level, umbracoNode.sortOrder"; return null; } - private static void GenerateXmlDocument(IDictionary> hierarchy, - IDictionary nodeIndex, int parentId, XmlNode parentNode) - { - List children; - - if (hierarchy.TryGetValue(parentId, out children)) - { - XmlNode childContainer = UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema || - String.IsNullOrEmpty(UmbracoSettings.TEMP_FRIENDLY_XML_CHILD_CONTAINER_NODENAME) - ? parentNode - : parentNode.SelectSingleNode( - UmbracoSettings.TEMP_FRIENDLY_XML_CHILD_CONTAINER_NODENAME); - - if (!UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema && - !String.IsNullOrEmpty(UmbracoSettings.TEMP_FRIENDLY_XML_CHILD_CONTAINER_NODENAME)) - { - if (childContainer == null) - { - childContainer = xmlHelper.addTextNode(parentNode.OwnerDocument, - UmbracoSettings. - TEMP_FRIENDLY_XML_CHILD_CONTAINER_NODENAME, ""); - parentNode.AppendChild(childContainer); - } - } - - foreach (int childId in children) - { - XmlNode childNode = nodeIndex[childId]; - - if (UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema || - String.IsNullOrEmpty(UmbracoSettings.TEMP_FRIENDLY_XML_CHILD_CONTAINER_NODENAME)) - { - parentNode.AppendChild(childNode); - } - else - { - childContainer.AppendChild(childNode); - } - - // Recursively build the content tree under the current child - GenerateXmlDocument(hierarchy, nodeIndex, childId, childNode); - } - } - } - [Obsolete("This method should not be used and does nothing, xml file persistence is done in a queue using a BackgroundTaskRunner")] public void PersistXmlToFile() { } - - /// - /// Adds a task to the xml cache file persister - /// - //private void QueueXmlForPersistence() - //{ - // _persisterTask = _persisterTask.Touch(); - //} - + internal DateTime GetCacheFileUpdateTime() { //TODO: Should there be a try/catch here in case the file is being written to while this is trying to be executed? @@ -723,31 +621,7 @@ order by umbracoNode.level, umbracoNode.sortOrder"; private static bool UseLegacySchema { get { return UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema; } - } - - // whether to keep version of everything (incl. medias & members) in cmsPreviewXml - // for audit purposes - false by default, not in umbracoSettings.config - // whether to... no idea what that one does - // it is false by default and not in UmbracoSettings.config anymore - ignoring - /* - private static bool GlobalPreviewStorageEnabled - { - get { return UmbracoConfig.For.UmbracoSettings().Content.GlobalPreviewStorageEnabled; } - } - */ - - // ensures config is valid - private void EnsureConfigurationIsValid() - { - if (SyncToXmlFile && SyncFromXmlFile) - throw new Exception("Cannot run with both ContinouslyUpdateXmlDiskCache and XmlContentCheckForDiskChanges being true."); - - if (XmlIsImmutable == false) - //LogHelper.Warn("Running with CloneXmlContent being false is a bad idea."); - LogHelper.Warn("CloneXmlContent is false - ignored, we always clone."); - - // note: if SyncFromXmlFile then we should also disable / warn that local edits are going to cause issues... - } + } #endregion @@ -842,13 +716,6 @@ order by umbracoNode.level, umbracoNode.sortOrder"; return xml2; } - private static void InitializeXml(XmlDocument xml, string dtd) - { - // prime the xml document with an inline dtd and a root element - xml.LoadXml(String.Format("{0}{1}{0}", - Environment.NewLine, dtd)); - } - // try to load from file, otherwise database // assumes xml lock (file is always locked) private void LoadXmlLocked(SafeXmlReaderWriter safeXml, out bool registerXmlChange) @@ -966,7 +833,7 @@ order by umbracoNode.level, umbracoNode.sortOrder"; _releaser = null; } } - + private static string ChildNodesXPath { get @@ -1264,6 +1131,11 @@ order by umbracoNode.level, umbracoNode.sortOrder"; publishedNode.Attributes.RemoveAll(); // remove all data nodes from the published node + //TODO: This could be faster, might as well just iterate all children and filter + // instead of selecting matching children (i.e. iterating all) and then iterating the + // filtered items to remove, this also allocates more memory to store the list of children. + // Below we also then do another filtering of child nodes, if we just iterate all children we + // can perform both functions more efficiently var dataNodes = publishedNode.SelectNodes(DataNodesXPath); if (dataNodes == null) throw new Exception("oops"); foreach (XmlNode n in dataNodes) @@ -1417,24 +1289,10 @@ order by umbracoNode.level, umbracoNode.sortOrder"; } } - /// - /// Occurs when [after loading the xml string from the database]. - /// + [Obsolete("This is no used, do not use this for any reason")] + [EditorBrowsable(EditorBrowsableState.Never)] public static event ContentCacheDatabaseLoadXmlStringEventHandler AfterContentCacheDatabaseLoadXmlString; - /// - /// Fires the before when creating the document cache from database - /// - /// The sender. - /// The instance containing the event data. - internal static void FireAfterContentCacheDatabaseLoadXmlString(ref string xml, ContentCacheLoadNodeEventArgs e) - { - if (AfterContentCacheDatabaseLoadXmlString != null) - { - AfterContentCacheDatabaseLoadXmlString(ref xml, e); - } - } - /// /// Occurs when [before when creating the document cache from database]. /// @@ -1453,24 +1311,10 @@ order by umbracoNode.level, umbracoNode.sortOrder"; } } - /// - /// Occurs when [after loading document cache xml node from database]. - /// + [Obsolete("This is no used, do not use this for any reason")] + [EditorBrowsable(EditorBrowsableState.Never)] public static event ContentCacheLoadNodeEventHandler AfterContentCacheLoadNodeFromDatabase; - /// - /// Fires the after loading document cache xml node from database - /// - /// The sender. - /// The instance containing the event data. - internal static void FireAfterContentCacheLoadNodeFromDatabase(XmlNode node, ContentCacheLoadNodeEventArgs e) - { - if (AfterContentCacheLoadNodeFromDatabase != null) - { - AfterContentCacheLoadNodeFromDatabase(node, e); - } - } - /// /// Occurs when [before a publish action updates the content cache]. ///