U4-9121 - improve url perfs

2016-10-28 14:33:44 +02:00
parent 7495d89a79
commit c500f98ad8
6 changed files with 268 additions and 199 deletions
--- a/src/Umbraco.Web/PublishedCache/XmlPublishedCache/PublishedContentCache.cs
+++ b/src/Umbraco.Web/PublishedCache/XmlPublishedCache/PublishedContentCache.cs
@@ -16,7 +16,9 @@ using umbraco;
 using System.Linq;
 using umbraco.BusinessLogic;
 using umbraco.presentation.preview;
+using Umbraco.Core.Services;
 using GlobalSettings = umbraco.GlobalSettings;
+using Task = System.Threading.Tasks.Task;

 namespace Umbraco.Web.PublishedCache.XmlPublishedCache
 {
@@ -26,6 +28,13 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache

        private readonly RoutesCache _routesCache = new RoutesCache(!UnitTesting);

+        private DomainHelper _domainHelper;
+
+        private DomainHelper GetDomainHelper(IDomainService domainService)
+        {
+            return _domainHelper ?? (_domainHelper = new DomainHelper(domainService));
+        }
+
        // for INTERNAL, UNIT TESTS use ONLY
        internal RoutesCache RoutesCache { get { return _routesCache; } }

@@ -99,6 +108,13 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache
            // - non-colliding, adds one complete "by route" lookup, only on the first time a url is computed (then it's cached anyways)
            // - colliding, adds one "by route" lookup, the first time the url is computed, then one dictionary looked each time it is computed again
            // assuming no collisions, the impact is one complete "by route" lookup the first time each url is computed
+            //
+            // U4-9121 - this lookup is too expensive when computing a large amount of urls on a front-end (eg menu)
+            // ... thinking about moving the lookup out of the path into its own async task, so we are not reporting errors
+            //     in the back-office anymore, but at least we are not polluting the cache
+            // instead, refactored DeterminedIdByRoute to stop using XPath, with a 16x improvement according to benchmarks
+            // will it be enough?
+
            var loopId = preview ? 0 : _routesCache.GetNodeId(route); // might be cached already in case of collision
            if (loopId == 0)
            {
@@ -130,62 +146,141 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache
            var pos = route.IndexOf('/');
            var path = pos == 0 ? route : route.Substring(pos);
            var startNodeId = pos == 0 ? 0 : int.Parse(route.Substring(0, pos));
-            IEnumerable<XPathVariable> vars;
-
-            var xpath = CreateXpathQuery(startNodeId, path, hideTopLevelNode, out vars);

            //check if we can find the node in our xml cache
-            var content = GetSingleByXPath(umbracoContext, preview, xpath, vars == null ? null : vars.ToArray());
+            var id = NavigateRoute(umbracoContext, preview, startNodeId, path, hideTopLevelNode);
+            if (id > 0) return GetById(umbracoContext, preview, id);

            // if hideTopLevelNodePath is true then for url /foo we looked for /*/foo
            // but maybe that was the url of a non-default top-level node, so we also
            // have to look for /foo (see note in ApplyHideTopLevelNodeFromPath).
-            if (content == null && hideTopLevelNode && path.Length > 1 && path.IndexOf('/', 1) < 0)
+            if (hideTopLevelNode && path.Length > 1 && path.IndexOf('/', 1) < 0)
            {
-                xpath = CreateXpathQuery(startNodeId, path, false, out vars);
-                content = GetSingleByXPath(umbracoContext, preview, xpath, vars == null ? null : vars.ToArray());
+                var id2 = NavigateRoute(umbracoContext, preview, startNodeId, path, false);
+                if (id2 > 0) return GetById(umbracoContext, preview, id2);
            }

-            return content;
+            return null;
+        }
+
+        private int NavigateRoute(UmbracoContext umbracoContext, bool preview, int startNodeId, string path, bool hideTopLevelNode)
+        {
+            var xml = GetXml(umbracoContext, preview);
+            XmlElement elt;
+
+            // empty path
+            if (path == string.Empty || path == "/")
+            {
+                if (startNodeId > 0)
+                {
+                    elt = xml.GetElementById(startNodeId.ToString(CultureInfo.InvariantCulture));
+                    return elt == null ? -1 : startNodeId;
+                }
+
+                elt = null;
+                var min = int.MaxValue;
+                foreach (XmlElement e in xml.DocumentElement.ChildNodes)
+                {
+                    var sortOrder = int.Parse(e.GetAttribute("sortOrder"));
+                    if (sortOrder < min)
+                    {
+                        min = sortOrder;
+                        elt = e;
+                    }
+                }
+                return elt == null ? -1 : int.Parse(elt.GetAttribute("id"));
+            }
+
+            // non-empty path
+            elt = startNodeId <= 0
+                ? xml.DocumentElement
+                : xml.GetElementById(startNodeId.ToString(CultureInfo.InvariantCulture));
+            if (elt == null) return -1;
+
+            var urlParts = path.Split(SlashChar, StringSplitOptions.RemoveEmptyEntries);
+
+            if (hideTopLevelNode && startNodeId <= 0)
+            {
+                foreach (XmlElement e in elt.ChildNodes)
+                {
+                    var id = NavigateElementRoute(e, urlParts);
+                    if (id > 0) return id;
+                }
+                return -1;
+            }
+
+            return NavigateElementRoute(elt, urlParts);
+        }
+
+        private static bool UseLegacySchema
+        {
+            get { return UmbracoConfig.For.UmbracoSettings().Content.UseLegacyXmlSchema; }
+        }
+
+        private int NavigateElementRoute(XmlElement elt, string[] urlParts)
+        {
+            var found = true;
+            var i = 0;
+            while (found && i < urlParts.Length)
+            {
+                found = false;
+                foreach (XmlElement child in elt.ChildNodes)
+                {
+                    var noNode = UseLegacySchema
+                        ? child.Name != "node"
+                        : child.GetAttributeNode("isDoc") == null;
+                    if (noNode) continue;
+                    if (child.GetAttribute("urlName") != urlParts[i]) continue;
+
+                    found = true;
+                    elt = child;
+                    break;
+                }
+                i++;
+            }
+            return found ? int.Parse(elt.GetAttribute("id")) : -1;
        }

        string DetermineRouteById(UmbracoContext umbracoContext, bool preview, int contentId)
        {
-            var node = GetById(umbracoContext, preview, contentId);
-            if (node == null)
-                return null;
+            var elt = GetXml(umbracoContext, preview).GetElementById(contentId.ToString(CultureInfo.InvariantCulture));
+            if (elt == null) return null;

-            var domainHelper = new DomainHelper(umbracoContext.Application.Services.DomainService);
+            var domainHelper = GetDomainHelper(umbracoContext.Application.Services.DomainService);

            // walk up from that node until we hit a node with a domain,
            // or we reach the content root, collecting urls in the way
            var pathParts = new List<string>();
-            var n = node;
-            var hasDomains = domainHelper.NodeHasDomains(n.Id);
-            while (hasDomains == false && n != null) // n is null at root
+            var eltId = int.Parse(elt.GetAttribute("id"));
+            var eltParentId = int.Parse(((XmlElement) elt.ParentNode).GetAttribute("id"));
+            var e = elt;
+            var id = eltId;
+            var hasDomains = domainHelper.NodeHasDomains(id);
+            while (hasDomains == false && id != -1)
            {
                // get the url
-                var urlName = n.UrlName;
+                var urlName = e.GetAttribute("urlName");
                pathParts.Add(urlName);

                // move to parent node
-                n = n.Parent;
-                hasDomains = n != null && domainHelper.NodeHasDomains(n.Id);
+                e = (XmlElement) e.ParentNode;
+                id = int.Parse(e.GetAttribute("id"));
+                hasDomains = id != -1 && domainHelper.NodeHasDomains(id);
            }

            // no domain, respect HideTopLevelNodeFromPath for legacy purposes
-            if (hasDomains == false && global::umbraco.GlobalSettings.HideTopLevelNodeFromPath)
-                ApplyHideTopLevelNodeFromPath(umbracoContext, node, pathParts);
+            if (hasDomains == false && GlobalSettings.HideTopLevelNodeFromPath)
+                ApplyHideTopLevelNodeFromPath(umbracoContext, eltId, eltParentId, pathParts);

            // assemble the route
            pathParts.Reverse();
            var path = "/" + string.Join("/", pathParts); // will be "/" or "/foo" or "/foo/bar" etc
-            var route = (n == null ? "" : n.Id.ToString(CultureInfo.InvariantCulture)) + path;
+            var route = (id == -1 ? "" : id.ToString(CultureInfo.InvariantCulture)) + path;

            return route;
        }

-        static void ApplyHideTopLevelNodeFromPath(UmbracoContext umbracoContext, IPublishedContent node, IList<string> pathParts)
+        static void ApplyHideTopLevelNodeFromPath(UmbracoContext umbracoContext, int nodeId, int parentId, IList<string> pathParts)
        {
            // in theory if hideTopLevelNodeFromPath is true, then there should be only once
            // top-level node, or else domains should be assigned. but for backward compatibility
@@ -195,12 +290,12 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache
            // "/foo" fails (looking for "/*/foo") we try also "/foo".
            // this does not make much sense anyway esp. if both "/foo/" and "/bar/foo" exist, but
            // that's the way it works pre-4.10 and we try to be backward compat for the time being
-            if (node.Parent == null)
+            if (parentId == -1)
            {
                var rootNode = umbracoContext.ContentCache.GetByRoute("/", true);
                if (rootNode == null)
                    throw new Exception("Failed to get node at /.");
-                if (rootNode.Id == node.Id) // remove only if we're the default node
+                if (rootNode.Id == nodeId) // remove only if we're the default node
                    pathParts.RemoveAt(pathParts.Count - 1);
            }
            else
@@ -217,12 +312,7 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache
 		{
 			public int Version { get; private set; }

-			public static string Root { get { return "/root"; } }
 			public string RootDocuments { get; private set; }
-			public string DescendantDocumentById { get; private set; }
-			public string ChildDocumentByUrlName { get; private set; }
-            public string ChildDocumentByUrlNameVar { get; private set; }
-            public string RootDocumentWithLowestSortOrder { get; private set; }

 			public XPathStringsDefinition(int version)
 			{
@@ -233,19 +323,11 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache
 					// legacy XML schema
 					case 0:
 						RootDocuments = "/root/node";
-						DescendantDocumentById = "//node [@id={0}]";
-						ChildDocumentByUrlName = "/node [@urlName='{0}']";
-						ChildDocumentByUrlNameVar = "/node [@urlName=${0}]";
-						RootDocumentWithLowestSortOrder = "/root/node [not(@sortOrder > ../node/@sortOrder)][1]";
 						break;

 					// default XML schema as of 4.10
 					case 1:
 						RootDocuments = "/root/* [@isDoc]";
-						DescendantDocumentById = "//* [@isDoc and @id={0}]";
-						ChildDocumentByUrlName = "/* [@isDoc and @urlName='{0}']";
-						ChildDocumentByUrlNameVar = "/* [@isDoc and @urlName=${0}]";
-						RootDocumentWithLowestSortOrder = "/root/* [@isDoc and not(@sortOrder > ../* [@isDoc]/@sortOrder)][1]";
 						break;

 					default:
@@ -421,84 +503,6 @@ namespace Umbraco.Web.PublishedCache.XmlPublishedCache

        static readonly char[] SlashChar = new[] { '/' };

-        protected string CreateXpathQuery(int startNodeId, string path, bool hideTopLevelNodeFromPath, out IEnumerable<XPathVariable> vars)
-        {
-            string xpath;
-            vars = null;
-
-            if (path == string.Empty || path == "/")
-            {
-                // if url is empty
-                if (startNodeId > 0)
-                {
-					// if in a domain then use the root node of the domain
-					xpath = string.Format(XPathStringsDefinition.Root + XPathStrings.DescendantDocumentById, startNodeId);
-                }
-                else
-                {
-                    // if not in a domain - what is the default page?
-                    // let's say it is the first one in the tree, if any -- order by sortOrder
-
-					// but!
-					// umbraco does not consistently guarantee that sortOrder starts with 0
-					// so the one that we want is the one with the smallest sortOrder
-					// read http://stackoverflow.com/questions/1128745/how-can-i-use-xpath-to-find-the-minimum-value-of-an-attribute-in-a-set-of-elemen
-
-					// so that one does not work, because min(@sortOrder) maybe 1
-					// xpath = "/root/*[@isDoc and @sortOrder='0']";
-
-					// and we can't use min() because that's XPath 2.0
-					// that one works
-					xpath = XPathStrings.RootDocumentWithLowestSortOrder;
-                }
-            }
-            else
-            {
-                // if url is not empty, then use it to try lookup a matching page
-                var urlParts = path.Split(SlashChar, StringSplitOptions.RemoveEmptyEntries);
-                var xpathBuilder = new StringBuilder();
-                int partsIndex = 0;
-                List<XPathVariable> varsList = null;
-
-                if (startNodeId == 0)
-                {
-					if (hideTopLevelNodeFromPath)
-						xpathBuilder.Append(XPathStrings.RootDocuments); // first node is not in the url
-					else
-						xpathBuilder.Append(XPathStringsDefinition.Root);
-                }
-                else
-                {
-					xpathBuilder.AppendFormat(XPathStringsDefinition.Root + XPathStrings.DescendantDocumentById, startNodeId);
-					// always "hide top level" when there's a domain
-                }
-
-                while (partsIndex < urlParts.Length)
-                {
-                    var part = urlParts[partsIndex++];
-                    if (part.Contains('\'') || part.Contains('"'))
-                    {
-                        // use vars, escaping gets ugly pretty quickly
-                        varsList = varsList ?? new List<XPathVariable>();
-                        var varName = string.Format("var{0}", partsIndex);
-                        varsList.Add(new XPathVariable(varName, part));
-                        xpathBuilder.AppendFormat(XPathStrings.ChildDocumentByUrlNameVar, varName);
-                    }
-                    else
-                    {
-                        xpathBuilder.AppendFormat(XPathStrings.ChildDocumentByUrlName, part);
-
-                    }
-                }
-
-                xpath = xpathBuilder.ToString();
-                if (varsList != null)
-                    vars = varsList.ToArray();
-            }
-
-            return xpath;
-        }
-
        #endregion

        #region Detached