diff --git a/build/Build.bat b/build/Build.bat index 98b8adad2f..1b715dcc15 100644 --- a/build/Build.bat +++ b/build/Build.bat @@ -56,7 +56,6 @@ REN .\_BuildOutput\WebApp\Xslt\Web.config Web.config.transform ECHO Packing the NuGet release files ..\src\.nuget\NuGet.exe Pack NuSpecs\UmbracoCms.Core.nuspec -Version %version% ..\src\.nuget\NuGet.exe Pack NuSpecs\UmbracoCms.nuspec -Version %version% -..\src\.nuget\NuGet.exe Pack NuSpecs\UmbracoExamine.PDF.nuspec IF ERRORLEVEL 1 GOTO :showerror diff --git a/build/Build.proj b/build/Build.proj index af7be44c7c..5be460d09b 100644 --- a/build/Build.proj +++ b/build/Build.proj @@ -75,7 +75,6 @@ UmbracoCms$(DECIMAL_BUILD_NUMBER).zip UmbracoCms.AllBinaries$(DECIMAL_BUILD_NUMBER).zip UmbracoCms.WebPI$(DECIMAL_BUILD_NUMBER).zip - UmbracoExamine.PDF.0.7.0.zip False ..\..\build\$(BuildFolder) $(MSBuildProjectDirectory)\$(BuildFolder) @@ -89,7 +88,6 @@ $(BuildFolderAbsolutePath)WebApp\ $(BuildFolderRelativeToProjects)WebPi\ $(BuildFolderAbsolutePath)WebPi\ - $(BuildFolderAbsolutePath)UmbracoExamine.PDF\ @@ -150,7 +148,7 @@ - + @@ -158,15 +156,7 @@ - - - - - - - - - + @@ -264,7 +254,7 @@ - + @@ -276,35 +266,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - $(BUILD_RELEASE) diff --git a/build/NuSpecs/UmbracoCms.Core.AllBinaries.nuspec b/build/NuSpecs/UmbracoCms.Core.AllBinaries.nuspec index 35839623e0..e8fae204ce 100644 --- a/build/NuSpecs/UmbracoCms.Core.AllBinaries.nuspec +++ b/build/NuSpecs/UmbracoCms.Core.AllBinaries.nuspec @@ -6,7 +6,7 @@ Umbraco Cms Core All Binaries Morten Christensen Umbraco HQ - http://umbraco.codeplex.com/license + http://opensource.org/licenses/MIT http://umbraco.com/ http://umbraco.com/media/357769/100px_transparent.png false diff --git a/build/NuSpecs/UmbracoCms.Core.Symbols.nuspec b/build/NuSpecs/UmbracoCms.Core.Symbols.nuspec index 29f7365017..d17cf39884 100644 --- a/build/NuSpecs/UmbracoCms.Core.Symbols.nuspec +++ b/build/NuSpecs/UmbracoCms.Core.Symbols.nuspec @@ -6,7 +6,7 @@ Umbraco Cms Core Binaries Umbraco HQ Umbraco HQ - http://umbraco.codeplex.com/license + http://opensource.org/licenses/MIT http://umbraco.com/ http://umbraco.com/media/357769/100px_transparent.png false diff --git a/build/NuSpecs/UmbracoCms.Core.nuspec b/build/NuSpecs/UmbracoCms.Core.nuspec index 33c8a20953..a86f172c6b 100644 --- a/build/NuSpecs/UmbracoCms.Core.nuspec +++ b/build/NuSpecs/UmbracoCms.Core.nuspec @@ -6,7 +6,7 @@ Umbraco Cms Core Binaries Umbraco HQ Umbraco HQ - http://umbraco.codeplex.com/license + http://opensource.org/licenses/MIT http://umbraco.com/ http://umbraco.com/media/357769/100px_transparent.png false diff --git a/build/NuSpecs/UmbracoCms.nuspec b/build/NuSpecs/UmbracoCms.nuspec index 1c035a4ffc..d047af2d29 100644 --- a/build/NuSpecs/UmbracoCms.nuspec +++ b/build/NuSpecs/UmbracoCms.nuspec @@ -6,7 +6,7 @@ Umbraco Cms Umbraco HQ Umbraco HQ - http://umbraco.codeplex.com/license + http://opensource.org/licenses/MIT http://umbraco.com/ http://umbraco.com/media/357769/100px_transparent.png false diff --git a/build/NuSpecs/UmbracoExamine.PDF.nuspec b/build/NuSpecs/UmbracoExamine.PDF.nuspec index ca4acdcc36..5d1afff2b5 100644 --- a/build/NuSpecs/UmbracoExamine.PDF.nuspec +++ b/build/NuSpecs/UmbracoExamine.PDF.nuspec @@ -5,7 +5,7 @@ 0.7.0 Umbraco HQ Umbraco HQ - http://umbraco.codeplex.com/license + http://opensource.org/licenses/MIT http://umbraco.com/ http://umbraco.com/media/357769/100px_transparent.png false diff --git a/src/UmbracoExamine.PDF/PDFIndexer.cs b/src/UmbracoExamine.PDF/PDFIndexer.cs deleted file mode 100644 index ef07b3929f..0000000000 --- a/src/UmbracoExamine.PDF/PDFIndexer.cs +++ /dev/null @@ -1,291 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Collections.Specialized; -using System.IO; -using System.Linq; -using System.Security; -using System.Text.RegularExpressions; -using System.Xml.Linq; -using Examine; -using iTextSharp.text.exceptions; -using iTextSharp.text.pdf; -using System.Text; -using Lucene.Net.Analysis; -using UmbracoExamine.DataServices; -using iTextSharp.text.pdf.parser; - - -namespace UmbracoExamine.PDF -{ - /// - /// An Umbraco Lucene.Net indexer which will index the text content of a file - /// - public class PDFIndexer : BaseUmbracoIndexer - { - #region Constructors - - /// - /// Default constructor - /// - public PDFIndexer() - { - SupportedExtensions = new[] { ".pdf" }; - UmbracoFileProperty = "umbracoFile"; - } - - /// - /// Constructor to allow for creating an indexer at runtime - /// - /// - /// - /// - /// - [SecuritySafeCritical] - public PDFIndexer(DirectoryInfo indexPath, IDataService dataService, Analyzer analyzer, bool async) - : base( - new IndexCriteria(Enumerable.Empty(), Enumerable.Empty(), Enumerable.Empty(), Enumerable.Empty(), null), - indexPath, dataService, analyzer, async) - { - SupportedExtensions = new[] { ".pdf" }; - UmbracoFileProperty = "umbracoFile"; - } - - /// - /// Constructor to allow for creating an indexer at runtime - /// - /// - /// - /// - /// - [SecuritySafeCritical] - public PDFIndexer(Lucene.Net.Store.Directory luceneDirectory, IDataService dataService, Analyzer analyzer, bool async) - : base( - new IndexCriteria(Enumerable.Empty(), Enumerable.Empty(), Enumerable.Empty(), Enumerable.Empty(), null), - luceneDirectory, dataService, analyzer, async) - { - SupportedExtensions = new[] { ".pdf" }; - UmbracoFileProperty = "umbracoFile"; - } - - #endregion - - - #region Properties - /// - /// Gets or sets the supported extensions for files, currently the system will only - /// process PDF files. - /// - /// The supported extensions. - public IEnumerable SupportedExtensions { get; set; } - - /// - /// Gets or sets the umbraco property alias (defaults to umbracoFile) - /// - /// The umbraco file property. - public string UmbracoFileProperty { get; set; } - - /// - /// Gets the name of the Lucene.Net field which the content is inserted into - /// - /// The name of the text content field. - public const string TextContentFieldName = "FileTextContent"; - - protected override IEnumerable SupportedTypes - { - get - { - return new string[] { IndexTypes.Media }; - } - } - - #endregion - - /// - /// Set up all properties for the indexer based on configuration information specified. This will ensure that - /// all of the folders required by the indexer are created and exist. - /// - /// - /// - [SecuritySafeCritical] - public override void Initialize(string name, NameValueCollection config) - { - base.Initialize(name, config); - - if (!string.IsNullOrEmpty(config["extensions"])) - SupportedExtensions = config["extensions"].Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); - - //checks if a custom field alias is specified - if (!string.IsNullOrEmpty(config["umbracoFileProperty"])) - UmbracoFileProperty = config["umbracoFileProperty"]; - } - - /// - /// Provides the means to extract the text to be indexed from the file specified - /// - /// - /// - protected virtual string ExtractTextFromFile(FileInfo file) - { - if (!SupportedExtensions.Select(x => x.ToUpper()).Contains(file.Extension.ToUpper())) - { - throw new NotSupportedException("The file with the extension specified is not supported"); - } - - var pdf = new PDFParser(); - - Action onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read PDF", -1, e)); - - var txt = pdf.GetTextFromAllPages(file.FullName, onError); - return txt; - - } - - /// - /// Collects all of the data that needs to be indexed as defined in the index set. - /// - /// Media item XML being indexed - /// Type of index (should only ever be media) - /// Fields containing the data for the index - protected override Dictionary GetDataToIndex(XElement node, string type) - { - var fields = base.GetDataToIndex(node, type); - - //find the field which contains the file - var filePath = node.Elements().FirstOrDefault(x => - { - if (x.Attribute("alias") != null) - return (string)x.Attribute("alias") == this.UmbracoFileProperty; - else - return x.Name == this.UmbracoFileProperty; - }); - //make sure the file exists - if (filePath != default(XElement) && !string.IsNullOrEmpty((string)filePath)) - { - //get the file path from the data service - var fullPath = this.DataService.MapPath((string)filePath); - var fi = new FileInfo(fullPath); - if (fi.Exists) - { - try - { - fields.Add(TextContentFieldName, ExtractTextFromFile(fi)); - } - catch (NotSupportedException) - { - //log that we couldn't index the file found - DataService.LogService.AddErrorLog((int)node.Attribute("id"), "UmbracoExamine.FileIndexer: Extension '" + fi.Extension + "' is not supported at this time"); - } - } - else - { - DataService.LogService.AddInfoLog((int)node.Attribute("id"), "UmbracoExamine.FileIndexer: No file found at path " + filePath); - } - } - - return fields; - } - - #region Internal PDFParser Class - - /// - /// Parses a PDF file and extracts the text from it. - /// - internal class PDFParser - { - - static PDFParser() - { - lock (Locker) - { - UnsupportedRange = new HashSet(); - foreach (var c in Enumerable.Range(0x0000, 0x001F)) - { - UnsupportedRange.Add((char) c); - } - UnsupportedRange.Add((char)0x1F); - - //replace line breaks with space - ReplaceWithSpace = new HashSet {'\r', '\n'}; - } - } - - private static readonly object Locker = new object(); - - /// - /// Stores the unsupported range of character - /// - /// - /// used as a reference: - /// http://www.tamasoft.co.jp/en/general-info/unicode.html - /// http://en.wikipedia.org/wiki/Summary_of_Unicode_character_assignments - /// http://en.wikipedia.org/wiki/Unicode - /// http://en.wikipedia.org/wiki/Basic_Multilingual_Plane - /// - private static HashSet UnsupportedRange; - - private static HashSet ReplaceWithSpace; - - [SecuritySafeCritical] - public string GetTextFromAllPages(string pdfPath, Action onError) - { - var output = new StringWriter(); - - try - { - using (var reader = new PdfReader(pdfPath)) - { - for (int i = 1; i <= reader.NumberOfPages; i++) - { - var result = - ExceptChars( - PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy()), - UnsupportedRange, - ReplaceWithSpace); - output.Write(result); - } - } - - } - catch (Exception ex) - { - onError(ex); - } - - return output.ToString(); - } - - - } - - /// - /// remove all toExclude chars from string - /// - /// - /// - /// - /// - private static string ExceptChars(string str, HashSet toExclude, HashSet replaceWithSpace) - { - var sb = new StringBuilder(str.Length); - for (var i = 0; i < str.Length; i++) - { - var c = str[i]; - if (toExclude.Contains(c) == false) - { - if (replaceWithSpace.Contains(c)) - { - sb.Append(" "); - } - else - { - sb.Append(c); - } - } - - } - return sb.ToString(); - } - - #endregion - } -} diff --git a/src/UmbracoExamine.PDF/Properties/AssemblyInfo.cs b/src/UmbracoExamine.PDF/Properties/AssemblyInfo.cs deleted file mode 100644 index ddb6fab57b..0000000000 --- a/src/UmbracoExamine.PDF/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,27 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Security; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyCompany("umbraco")] -[assembly: AssemblyCopyright("Copyright © Umbraco 2012")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] -[assembly: AssemblyTitle("UmbracoExamine.PDF")] -[assembly: AssemblyDescription("Umbraco index providers for PDF based on the Examine model using Lucene.NET")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyProduct("UmbracoExamine.PDF")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("8933a78d-8414-4c72-a74d-76aa7fb0e9ad")] - -[assembly: AssemblyVersion("0.7.0.*")] -[assembly: AssemblyFileVersion("0.7.0.*")] \ No newline at end of file diff --git a/src/UmbracoExamine.PDF/UmbracoExamine.PDF.csproj b/src/UmbracoExamine.PDF/UmbracoExamine.PDF.csproj deleted file mode 100644 index adc9b175fc..0000000000 --- a/src/UmbracoExamine.PDF/UmbracoExamine.PDF.csproj +++ /dev/null @@ -1,96 +0,0 @@ - - - - Debug - AnyCPU - 8.0.30703 - 2.0 - {F30DDDB8-3994-4673-82AE-057123C6E1A8} - Library - Properties - UmbracoExamine.PDF - UmbracoExamine.PDF - v4.5 - 512 - - - - - - - - - - ..\ - true - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - SecurityRules.ruleset - false - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - bin\Release\UmbracoExamine.PDF.XML - false - - - - False - ..\packages\Examine.0.1.61.2941\lib\Examine.dll - - - False - ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll - - - False - ..\packages\iTextSharp.5.5.3\lib\itextsharp.dll - - - False - ..\packages\Lucene.Net.2.9.4.1\lib\net40\Lucene.Net.dll - - - - - - - - - - - - - - - - {07fbc26b-2927-4a22-8d96-d644c667fecc} - UmbracoExamine - - - - - - - - - - \ No newline at end of file diff --git a/src/UmbracoExamine.PDF/app.config b/src/UmbracoExamine.PDF/app.config deleted file mode 100644 index b77bae14a4..0000000000 --- a/src/UmbracoExamine.PDF/app.config +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/UmbracoExamine.PDF/packages.config b/src/UmbracoExamine.PDF/packages.config deleted file mode 100644 index 72c8a05f8b..0000000000 --- a/src/UmbracoExamine.PDF/packages.config +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/src/umbraco.sln b/src/umbraco.sln index 78202fd38a..30a2e2dd19 100644 --- a/src/umbraco.sln +++ b/src/umbraco.sln @@ -33,7 +33,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "NuSpecs", "NuSpecs", "{227C ..\build\NuSpecs\UmbracoCms.Core.nuspec = ..\build\NuSpecs\UmbracoCms.Core.nuspec ..\build\NuSpecs\UmbracoCms.Core.Symbols.nuspec = ..\build\NuSpecs\UmbracoCms.Core.Symbols.nuspec ..\build\NuSpecs\UmbracoCms.nuspec = ..\build\NuSpecs\UmbracoCms.nuspec - ..\build\NuSpecs\UmbracoExamine.PDF.nuspec = ..\build\NuSpecs\UmbracoExamine.PDF.nuspec EndProjectSection EndProject Project("{E24C65DC-7377-472B-9ABA-BC803B73C61A}") = "Umbraco.Web.UI.Client", "http://localhost:3961", "{3819A550-DCEC-4153-91B4-8BA9F7F0B9B4}"