From 8c1996a7a1242a9db7ae13066ee615d296b9e05c Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 31 Aug 2017 09:15:02 +0200 Subject: [PATCH 1/3] Changed from the String object qualifier to type qualifier --- src/Umbraco.Web/HtmlStringUtilities.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Umbraco.Web/HtmlStringUtilities.cs b/src/Umbraco.Web/HtmlStringUtilities.cs index de3d084e0b..e4d5b2a4e9 100644 --- a/src/Umbraco.Web/HtmlStringUtilities.cs +++ b/src/Umbraco.Web/HtmlStringUtilities.cs @@ -247,13 +247,13 @@ namespace Umbraco.Web outputms.Position = 0; using (TextReader outputtr = new StreamReader(outputms)) { - string result = String.Empty; + string result = string.Empty; string firstTrim = outputtr.ReadToEnd().Replace(" ", " ").Trim(); //Check to see if there is an empty char between the hellip and the output string //if there is, remove it - if (String.IsNullOrEmpty(firstTrim) == false) + if (string.IsNullOrWhiteSpace(firstTrim) == false) { result = firstTrim[firstTrim.Length - hellip.Length -1] == ' ' ? firstTrim.Remove(firstTrim.Length - hellip.Length -1, 1) : firstTrim; } From 73937172dce66255f289462e667befe20ef4874e Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 31 Aug 2017 11:43:06 +0200 Subject: [PATCH 2/3] Added check for invalid HTML and sanitization on said HTML Added HtmlSanitization package Added check for invalid HTML and sanitization on WordsToLength and Truncate methods Addes some extra comments --- src/Umbraco.Core/Umbraco.Core.csproj | 3 ++ src/Umbraco.Core/packages.config | 1 + src/Umbraco.Web/HtmlStringUtilities.cs | 38 ++++++++++++++++++++------ src/Umbraco.Web/Umbraco.Web.csproj | 6 ++++ src/Umbraco.Web/packages.config | 2 ++ 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index 24fdf32150..6a4305dec7 100644 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -37,6 +37,9 @@ false + + ..\packages\AngleSharp.0.9.9\lib\net45\AngleSharp.dll + ..\packages\AutoMapper.3.3.1\lib\net40\AutoMapper.dll True diff --git a/src/Umbraco.Core/packages.config b/src/Umbraco.Core/packages.config index dd00d61ea6..3634d0d25a 100644 --- a/src/Umbraco.Core/packages.config +++ b/src/Umbraco.Core/packages.config @@ -1,5 +1,6 @@  + diff --git a/src/Umbraco.Web/HtmlStringUtilities.cs b/src/Umbraco.Web/HtmlStringUtilities.cs index e4d5b2a4e9..3acdff9c81 100644 --- a/src/Umbraco.Web/HtmlStringUtilities.cs +++ b/src/Umbraco.Web/HtmlStringUtilities.cs @@ -5,6 +5,7 @@ using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; +using Ganss.XSS; using HtmlAgilityPack; using Umbraco.Web.WebApi.Filters; @@ -89,6 +90,18 @@ namespace Umbraco.Web { string hellip = "…"; + HtmlDocument doc = new HtmlDocument(); + HtmlSanitizer sanitizer = new HtmlSanitizer(); + + doc.LoadHtml(html); + + //Check for invalid HTML + if (doc.ParseErrors.Any()) + { + //Sanitize invalid HTML, it will not be pretty, but it will be valid + html = sanitizer.Sanitize(html); + } + using (var outputms = new MemoryStream()) { using (var outputtw = new StreamWriter(outputms)) @@ -255,7 +268,7 @@ namespace Umbraco.Web //if there is, remove it if (string.IsNullOrWhiteSpace(firstTrim) == false) { - result = firstTrim[firstTrim.Length - hellip.Length -1] == ' ' ? firstTrim.Remove(firstTrim.Length - hellip.Length -1, 1) : firstTrim; + result = firstTrim[firstTrim.Length - hellip.Length - 1] == ' ' ? firstTrim.Remove(firstTrim.Length - hellip.Length - 1, 1) : firstTrim; } return new HtmlString(result); } @@ -272,10 +285,13 @@ namespace Umbraco.Web public int WordsToLength(string html, int words, bool tagsAsContent) { HtmlDocument doc = new HtmlDocument(); + HtmlSanitizer sanitizer = new HtmlSanitizer(); + + doc.LoadHtml(html); int wordCount = 0, length = 0, - insideTagCounter = length, + insideTagCounter = 0, maxWords = words; string strippedOfTags = string.Empty; @@ -283,14 +299,21 @@ namespace Umbraco.Web //If tagsAsContent is on, use the string stripped of html tags if (tagsAsContent == false) { - doc.LoadHtml(html); - foreach (var node in doc.DocumentNode.ChildNodes) { strippedOfTags += node.InnerText; } html = strippedOfTags; } + else + { + //Check for invalid HTML + if (doc.ParseErrors.Any()) + { + //Sanitize invalid HTML, it will not be pretty, but it will be valid + html = sanitizer.Sanitize(html); + } + } while (length < html.Length) { @@ -300,14 +323,13 @@ namespace Umbraco.Web while (length < html.Length && char.IsWhiteSpace(html[length]) == false) { //Check if we have a space inside a tag and increase the length if we do + //We ignore the end tag as it is added by the Truncate method if (html[length].Equals('<') && html[length + 1].Equals('/') == false && tagsAsContent) { + insideTagCounter = length; while (html[insideTagCounter].Equals('>') == false) { - if (html[insideTagCounter].Equals(' ')) - { - length++; - } + length++; insideTagCounter++; } } diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index 9aaab32af9..f942a35adf 100644 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -97,6 +97,9 @@ {07fbc26b-2927-4a22-8d96-d644c667fecc} UmbracoExamine + + ..\packages\AngleSharp.0.9.9\lib\net45\AngleSharp.dll + ..\packages\AutoMapper.3.3.1\lib\net40\AutoMapper.dll True @@ -124,6 +127,9 @@ ..\packages\HtmlAgilityPack.1.4.9.5\lib\Net45\HtmlAgilityPack.dll True + + ..\packages\HtmlSanitizer.3.4.156\lib\net45\HtmlSanitizer.dll + ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll True diff --git a/src/Umbraco.Web/packages.config b/src/Umbraco.Web/packages.config index daef6ee7fd..ce25d7c26c 100644 --- a/src/Umbraco.Web/packages.config +++ b/src/Umbraco.Web/packages.config @@ -1,10 +1,12 @@  + + From 972d5f8296981397e04e0cd290d8a02b1ac1c677 Mon Sep 17 00:00:00 2001 From: Robert Date: Sun, 3 Sep 2017 21:17:53 +0200 Subject: [PATCH 3/3] Reverted back to last commit, invalid html check added --- src/Umbraco.Core/Umbraco.Core.csproj | 3 --- src/Umbraco.Core/packages.config | 1 - src/Umbraco.Web/HtmlStringUtilities.cs | 32 ++------------------------ src/Umbraco.Web/Umbraco.Web.csproj | 6 ----- src/Umbraco.Web/packages.config | 2 -- 5 files changed, 2 insertions(+), 42 deletions(-) diff --git a/src/Umbraco.Core/Umbraco.Core.csproj b/src/Umbraco.Core/Umbraco.Core.csproj index 6a4305dec7..24fdf32150 100644 --- a/src/Umbraco.Core/Umbraco.Core.csproj +++ b/src/Umbraco.Core/Umbraco.Core.csproj @@ -37,9 +37,6 @@ false - - ..\packages\AngleSharp.0.9.9\lib\net45\AngleSharp.dll - ..\packages\AutoMapper.3.3.1\lib\net40\AutoMapper.dll True diff --git a/src/Umbraco.Core/packages.config b/src/Umbraco.Core/packages.config index 3634d0d25a..dd00d61ea6 100644 --- a/src/Umbraco.Core/packages.config +++ b/src/Umbraco.Core/packages.config @@ -1,6 +1,5 @@  - diff --git a/src/Umbraco.Web/HtmlStringUtilities.cs b/src/Umbraco.Web/HtmlStringUtilities.cs index 3acdff9c81..f9adfc5175 100644 --- a/src/Umbraco.Web/HtmlStringUtilities.cs +++ b/src/Umbraco.Web/HtmlStringUtilities.cs @@ -5,7 +5,6 @@ using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; -using Ganss.XSS; using HtmlAgilityPack; using Umbraco.Web.WebApi.Filters; @@ -90,18 +89,6 @@ namespace Umbraco.Web { string hellip = "…"; - HtmlDocument doc = new HtmlDocument(); - HtmlSanitizer sanitizer = new HtmlSanitizer(); - - doc.LoadHtml(html); - - //Check for invalid HTML - if (doc.ParseErrors.Any()) - { - //Sanitize invalid HTML, it will not be pretty, but it will be valid - html = sanitizer.Sanitize(html); - } - using (var outputms = new MemoryStream()) { using (var outputtw = new StreamWriter(outputms)) @@ -285,16 +272,13 @@ namespace Umbraco.Web public int WordsToLength(string html, int words, bool tagsAsContent) { HtmlDocument doc = new HtmlDocument(); - HtmlSanitizer sanitizer = new HtmlSanitizer(); - doc.LoadHtml(html); int wordCount = 0, length = 0, - insideTagCounter = 0, maxWords = words; - string strippedOfTags = string.Empty; + bool invalidHtml = doc.ParseErrors.Any(); //If tagsAsContent is on, use the string stripped of html tags if (tagsAsContent == false) @@ -305,15 +289,6 @@ namespace Umbraco.Web } html = strippedOfTags; } - else - { - //Check for invalid HTML - if (doc.ParseErrors.Any()) - { - //Sanitize invalid HTML, it will not be pretty, but it will be valid - html = sanitizer.Sanitize(html); - } - } while (length < html.Length) { @@ -323,14 +298,11 @@ namespace Umbraco.Web while (length < html.Length && char.IsWhiteSpace(html[length]) == false) { //Check if we have a space inside a tag and increase the length if we do - //We ignore the end tag as it is added by the Truncate method if (html[length].Equals('<') && html[length + 1].Equals('/') == false && tagsAsContent) { - insideTagCounter = length; - while (html[insideTagCounter].Equals('>') == false) + while (html[length].Equals('>') == false && invalidHtml == false) { length++; - insideTagCounter++; } } length++; diff --git a/src/Umbraco.Web/Umbraco.Web.csproj b/src/Umbraco.Web/Umbraco.Web.csproj index f942a35adf..9aaab32af9 100644 --- a/src/Umbraco.Web/Umbraco.Web.csproj +++ b/src/Umbraco.Web/Umbraco.Web.csproj @@ -97,9 +97,6 @@ {07fbc26b-2927-4a22-8d96-d644c667fecc} UmbracoExamine - - ..\packages\AngleSharp.0.9.9\lib\net45\AngleSharp.dll - ..\packages\AutoMapper.3.3.1\lib\net40\AutoMapper.dll True @@ -127,9 +124,6 @@ ..\packages\HtmlAgilityPack.1.4.9.5\lib\Net45\HtmlAgilityPack.dll True - - ..\packages\HtmlSanitizer.3.4.156\lib\net45\HtmlSanitizer.dll - ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll True diff --git a/src/Umbraco.Web/packages.config b/src/Umbraco.Web/packages.config index ce25d7c26c..daef6ee7fd 100644 --- a/src/Umbraco.Web/packages.config +++ b/src/Umbraco.Web/packages.config @@ -1,12 +1,10 @@  - -