From 0688550143bb11ed0ba55f124508aec171ae7ab2 Mon Sep 17 00:00:00 2001 From: Shannon Date: Thu, 13 Nov 2014 13:09:29 +1100 Subject: [PATCH] start integrating the local templ storage option for examine --- .../LocalStorage/LocalTempStorageDirectory.cs | 93 +++++++++++ .../LocalStorage/LocalTempStorageIndexer.cs | 153 ++++++++++++++++++ .../LocalStorage/MultiIndexOutput.cs | 72 +++++++++ src/UmbracoExamine/UmbracoContentIndexer.cs | 47 ++++++ src/UmbracoExamine/UmbracoExamine.csproj | 3 + src/UmbracoExamine/UmbracoExamineSearcher.cs | 66 +++++++- src/UmbracoExamine/UmbracoMemberIndexer.cs | 71 +++++++- 7 files changed, 502 insertions(+), 3 deletions(-) create mode 100644 src/UmbracoExamine/LocalStorage/LocalTempStorageDirectory.cs create mode 100644 src/UmbracoExamine/LocalStorage/LocalTempStorageIndexer.cs create mode 100644 src/UmbracoExamine/LocalStorage/MultiIndexOutput.cs diff --git a/src/UmbracoExamine/LocalStorage/LocalTempStorageDirectory.cs b/src/UmbracoExamine/LocalStorage/LocalTempStorageDirectory.cs new file mode 100644 index 0000000000..8c903a850d --- /dev/null +++ b/src/UmbracoExamine/LocalStorage/LocalTempStorageDirectory.cs @@ -0,0 +1,93 @@ +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using Lucene.Net.Store; + +namespace UmbracoExamine.LocalStorage +{ + public class LocalTempStorageDirectory : SimpleFSDirectory + { + private readonly Lucene.Net.Store.Directory _realDirectory; + + public LocalTempStorageDirectory( + DirectoryInfo tempStorageDir, + Lucene.Net.Store.Directory realDirectory) + : base(tempStorageDir) + { + _realDirectory = realDirectory; + } + + public override string[] ListAll() + { + //always from the real dir + return _realDirectory.ListAll(); + } + + /// Returns true if a file with the given name exists. + public override bool FileExists(string name) + { + //always from the real dir + return _realDirectory.FileExists(name); + } + + /// Returns the time the named file was last modified. + public override long FileModified(string name) + { + //always from the real dir + return _realDirectory.FileModified(name); + } + + /// Set the modified time of an existing file to now. + public override void TouchFile(string name) + { + //always from the real dir + _realDirectory.TouchFile(name); + } + + /// Removes an existing file in the directory. + public override void DeleteFile(string name) + { + //perform on both dirs + _realDirectory.DeleteFile(name); + base.DeleteFile(name); + } + + /// Returns the length of a file in the directory. + public override long FileLength(string name) + { + //always from the real dir + return _realDirectory.FileLength(name); + } + + /// + /// Creates a new, empty file in the directory with the given name. + /// Returns a stream writing this file. + /// + public override IndexOutput CreateOutput(string name) + { + //write to both indexes + + return new MultiIndexOutput( + base.CreateOutput(name), + _realDirectory.CreateOutput(name)); + } + + /// + /// Returns a stream reading an existing file. + /// + public override IndexInput OpenInput(string name) + { + //return the reader from the cache, not the real dir + return base.OpenInput(name); + } + + public override void Dispose() + { + base.Dispose(); + _realDirectory.Dispose(); + } + + + } +} diff --git a/src/UmbracoExamine/LocalStorage/LocalTempStorageIndexer.cs b/src/UmbracoExamine/LocalStorage/LocalTempStorageIndexer.cs new file mode 100644 index 0000000000..0d702c8e12 --- /dev/null +++ b/src/UmbracoExamine/LocalStorage/LocalTempStorageIndexer.cs @@ -0,0 +1,153 @@ +using System.Collections.Specialized; +using System.IO; +using System.Linq; +using System.Web; +using Lucene.Net.Analysis; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Umbraco.Core; +using Umbraco.Core.IO; +using Umbraco.Core.Logging; +using Directory = System.IO.Directory; + +namespace UmbracoExamine.LocalStorage +{ + internal class LocalTempStorageIndexer + { + private string _tempPath; + public Lucene.Net.Store.Directory LuceneDirectory { get; private set; } + private static readonly object Locker = new object(); + public SnapshotDeletionPolicy Snapshotter { get; private set; } + private bool _syncStorage = false; + + public LocalTempStorageIndexer() + { + IndexDeletionPolicy policy = new KeepOnlyLastCommitDeletionPolicy(); + Snapshotter = new SnapshotDeletionPolicy(policy); + } + + public void Initialize(NameValueCollection config, string configuredPath, Lucene.Net.Store.Directory baseLuceneDirectory, Analyzer analyzer) + { + var codegenPath = HttpRuntime.CodegenDir; + + _tempPath = Path.Combine(codegenPath, configuredPath.TrimStart('~', '/').Replace("/", "\\")); + + if (config != null) + { + if (config["syncTempStorage"] != null) + { + var attempt = config["syncTempStorage"].TryConvertTo(); + if (attempt) + { + _syncStorage = attempt.Result; + } + } + } + + InitializeLocalIndexAndDirectory(baseLuceneDirectory, analyzer, configuredPath); + } + + private void InitializeLocalIndexAndDirectory(Lucene.Net.Store.Directory baseLuceneDirectory, Analyzer analyzer, string configuredPath) + { + lock (Locker) + { + if (Directory.Exists(_tempPath) == false) + { + Directory.CreateDirectory(_tempPath); + } + + //if we are syncing storage to the main file system to temp files, then sync from the main FS to our temp FS + if (_syncStorage) + { + //copy index + + using (new IndexWriter( + //read from the underlying/default directory, not the temp codegen dir + baseLuceneDirectory, + analyzer, + Snapshotter, + IndexWriter.MaxFieldLength.UNLIMITED)) + { + try + { + var basePath = IOHelper.MapPath(configuredPath); + + var commit = Snapshotter.Snapshot(); + var allSnapshotFiles = commit.GetFileNames().Concat(new[] {commit.GetSegmentsFileName()}).ToArray(); + + var tempDir = new DirectoryInfo(_tempPath); + + //Get all files in the temp storage that don't exist in the snapshot collection, we want to remove these + var toRemove = tempDir.GetFiles() + .Select(x => x.Name) + .Except(allSnapshotFiles); + + using (var tempDirectory = new SimpleFSDirectory(tempDir)) + { + if (IndexWriter.IsLocked(tempDirectory) == false) + { + foreach (var file in toRemove) + { + try + { + File.Delete(Path.Combine(_tempPath, file)); + } + catch (IOException ex) + { + LogHelper.Error("Could not delete index file, could not sync from main storage", ex); + + //quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage + return; + } + } + } + else + { + LogHelper.Warn("Cannot sync index files from main storage, the index is currently locked"); + + //quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage + return; + } + } + + foreach (var fileName in allSnapshotFiles.Where(f => f.IsNullOrWhiteSpace() == false)) + { + try + { + File.Copy( + Path.Combine(basePath, "Index", fileName), + Path.Combine(_tempPath, Path.GetFileName(fileName)), true); + } + catch (IOException ex) + { + LogHelper.Error("Could not copy index file, could not sync from main storage", ex); + + //quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage + return; + } + } + + } + finally + { + Snapshotter.Release(); + } + } + + //create the custom lucene directory which will keep the main and temp FS's in sync + + LuceneDirectory = new LocalTempStorageDirectory( + new DirectoryInfo(_tempPath), + baseLuceneDirectory); + } + else + { + //just return a normal lucene directory that uses the codegen folder + + LuceneDirectory = FSDirectory.Open(new DirectoryInfo(_tempPath)); + } + + } + } + } +} \ No newline at end of file diff --git a/src/UmbracoExamine/LocalStorage/MultiIndexOutput.cs b/src/UmbracoExamine/LocalStorage/MultiIndexOutput.cs new file mode 100644 index 0000000000..fed13643b8 --- /dev/null +++ b/src/UmbracoExamine/LocalStorage/MultiIndexOutput.cs @@ -0,0 +1,72 @@ +using System; +using System.Linq; +using Lucene.Net.Store; + +namespace UmbracoExamine.LocalStorage +{ + public class MultiIndexOutput : IndexOutput + { + private readonly IndexOutput[] _outputs; + + public MultiIndexOutput(params IndexOutput[] outputs) + { + if (outputs.Length < 1) + { + throw new InvalidOperationException("There must be at least one output specified"); + } + _outputs = outputs; + } + + public override void WriteByte(byte b) + { + foreach (var output in _outputs) + { + output.WriteByte(b); + } + } + + public override void WriteBytes(byte[] b, int offset, int length) + { + foreach (var output in _outputs) + { + output.WriteBytes(b, offset, length); + } + } + + public override void Flush() + { + foreach (var output in _outputs) + { + output.Flush(); + } + } + + public override void Close() + { + foreach (var output in _outputs) + { + output.Close(); + } + } + + public override long GetFilePointer() + { + //return the first + return _outputs.First().GetFilePointer(); + } + + public override void Seek(long pos) + { + foreach (var output in _outputs) + { + output.Seek(pos); + } + } + + public override long Length() + { + //return the first + return _outputs.First().GetFilePointer(); + } + } +} \ No newline at end of file diff --git a/src/UmbracoExamine/UmbracoContentIndexer.cs b/src/UmbracoExamine/UmbracoContentIndexer.cs index 3f442f5daa..921784cdd0 100644 --- a/src/UmbracoExamine/UmbracoContentIndexer.cs +++ b/src/UmbracoExamine/UmbracoContentIndexer.cs @@ -12,6 +12,7 @@ using Examine; using Examine.Config; using Examine.Providers; using Lucene.Net.Documents; +using Lucene.Net.Index; using Umbraco.Core; using umbraco.cms.businesslogic; using UmbracoExamine.DataServices; @@ -21,6 +22,7 @@ using UmbracoExamine.Config; using Examine.LuceneEngine.Providers; using Lucene.Net.Analysis; using umbraco.BasePages; +using UmbracoExamine.LocalStorage; namespace UmbracoExamine @@ -65,6 +67,8 @@ namespace UmbracoExamine #region Constants & Fields + private readonly LocalTempStorageIndexer _localTempStorageHelper = new LocalTempStorageIndexer(); + /// /// Used to store the path of a content object /// @@ -147,6 +151,22 @@ namespace UmbracoExamine base.Initialize(name, config); + + if (config != null && config["useTempStorage"] != null) + { + //Use the temp storage directory which will store the index in the local/codegen folder, this is useful + // for websites that are running from a remove file server and file IO latency becomes an issue + var attemptUseTempStorage = config["useTempStorage"].TryConvertTo(); + if (attemptUseTempStorage) + { + var indexSet = IndexSets.Instance.Sets[IndexSetName]; + var configuredPath = indexSet.IndexPath; + + _localTempStorageHelper.Initialize(config, configuredPath, base.GetLuceneDirectory(), IndexingAnalyzer); + } + } + + } #endregion @@ -220,6 +240,33 @@ namespace UmbracoExamine #region Public methods + public override Lucene.Net.Store.Directory GetLuceneDirectory() + { + //if temp local storage is configured use that, otherwise return the default + if (_localTempStorageHelper.LuceneDirectory != null) + { + return _localTempStorageHelper.LuceneDirectory; + } + + return base.GetLuceneDirectory(); + + } + + public override IndexWriter GetIndexWriter() + { + //if temp local storage is configured use that, otherwise return the default + if (_localTempStorageHelper.LuceneDirectory != null) + { + return new IndexWriter(GetLuceneDirectory(), IndexingAnalyzer, + //create the writer with the snapshotter, though that won't make too much a difference because we are not keeping the writer open unless using nrt + // which we are not currently. + _localTempStorageHelper.Snapshotter, + IndexWriter.MaxFieldLength.UNLIMITED); + } + + return base.GetIndexWriter(); + } + /// /// Overridden for logging diff --git a/src/UmbracoExamine/UmbracoExamine.csproj b/src/UmbracoExamine/UmbracoExamine.csproj index e64821b098..49ee6c8ff6 100644 --- a/src/UmbracoExamine/UmbracoExamine.csproj +++ b/src/UmbracoExamine/UmbracoExamine.csproj @@ -121,6 +121,9 @@ + + + diff --git a/src/UmbracoExamine/UmbracoExamineSearcher.cs b/src/UmbracoExamine/UmbracoExamineSearcher.cs index 7406e98f8e..d295e09480 100644 --- a/src/UmbracoExamine/UmbracoExamineSearcher.cs +++ b/src/UmbracoExamine/UmbracoExamineSearcher.cs @@ -2,15 +2,19 @@ using System.IO; using System.Linq; using System.Security; +using System.Web; using Examine; +using Examine.LuceneEngine.Config; using Examine.Providers; using Examine.SearchCriteria; +using Lucene.Net.Store; using Umbraco.Core; using UmbracoExamine.Config; using Examine.LuceneEngine; using Examine.LuceneEngine.Providers; using Examine.LuceneEngine.SearchCriteria; using Lucene.Net.Analysis; +using UmbracoExamine.LocalStorage; namespace UmbracoExamine @@ -21,6 +25,11 @@ namespace UmbracoExamine public class UmbracoExamineSearcher : LuceneSearcher { + private volatile Lucene.Net.Store.Directory _localTempDirectory; + private static readonly object Locker = new object(); + private string _localTempPath = null; + private bool _syncTempStorage = false; + #region Constructors /// @@ -54,12 +63,35 @@ namespace UmbracoExamine _name = name; //We need to check if we actually can initialize, if not then don't continue - if (!CanInitialize()) + if (CanInitialize() == false) { return; } base.Initialize(name, config); + + if (config != null && config["useTempStorage"] != null) + { + //Use the temp storage directory which will store the index in the local/codegen folder, this is useful + // for websites that are running from a remove file server and file IO latency becomes an issue + var attemptUseTempStorage = config["useTempStorage"].TryConvertTo(); + if (attemptUseTempStorage) + { + var indexSet = IndexSets.Instance.Sets[IndexSetName]; + var configuredPath = indexSet.IndexPath; + var codegenPath = HttpRuntime.CodegenDir; + _localTempPath = Path.Combine(codegenPath, configuredPath.TrimStart('~', '/').Replace("/", "\\")); + } + + if (config["syncTempStorage"] != null) + { + var attemptSync = config["syncTempStorage"].TryConvertTo(); + if (attemptSync) + { + _syncTempStorage = attemptSync.Result; + } + } + } } /// @@ -136,6 +168,36 @@ namespace UmbracoExamine .Where(x => x != UmbracoContentIndexer.IndexPathFieldName) .Where(x => x != UmbracoContentIndexer.NodeTypeAliasFieldName) .ToArray(); - } + } + + protected override Lucene.Net.Store.Directory GetLuceneDirectory() + { + //local temp storage is not enabled, just return the default + if (_localTempPath == null) return base.GetLuceneDirectory(); + + //local temp storage is enabled, configure the local directory instance + if (_localTempDirectory == null) + { + lock (Locker) + { + if (_localTempDirectory == null) + { + if (_syncTempStorage) + { + _localTempDirectory = new LocalTempStorageDirectory( + new DirectoryInfo(_localTempPath), + base.GetLuceneDirectory()); + } + else + { + //not syncing just use a normal lucene directory + _localTempDirectory = FSDirectory.Open(new DirectoryInfo(_localTempPath)); + } + } + } + } + + return _localTempDirectory; + } } } diff --git a/src/UmbracoExamine/UmbracoMemberIndexer.cs b/src/UmbracoExamine/UmbracoMemberIndexer.cs index b13ac76c23..2ac64cb712 100644 --- a/src/UmbracoExamine/UmbracoMemberIndexer.cs +++ b/src/UmbracoExamine/UmbracoMemberIndexer.cs @@ -1,9 +1,12 @@ using System.Collections; +using System.Collections.Specialized; using System.Linq; using System.Security; using System.Xml.Linq; using System.Xml.XPath; using Examine.LuceneEngine.Config; +using Lucene.Net.Index; +using Umbraco.Core; using UmbracoExamine.Config; using umbraco.cms.businesslogic.member; using Examine.LuceneEngine; @@ -12,6 +15,7 @@ using Examine; using System.IO; using UmbracoExamine.DataServices; using Lucene.Net.Analysis; +using UmbracoExamine.LocalStorage; namespace UmbracoExamine { @@ -21,6 +25,8 @@ namespace UmbracoExamine public class UmbracoMemberIndexer : UmbracoContentIndexer { + private readonly LocalTempStorageIndexer _localTempStorageHelper = new LocalTempStorageIndexer(); + /// /// Default constructor /// @@ -38,7 +44,70 @@ namespace UmbracoExamine public UmbracoMemberIndexer(IIndexCriteria indexerData, DirectoryInfo indexPath, IDataService dataService, Analyzer analyzer, bool async) : base(indexerData, indexPath, dataService, analyzer, async) { } - /// + /// + /// Set up all properties for the indexer based on configuration information specified. This will ensure that + /// all of the folders required by the indexer are created and exist. This will also create an instruction + /// file declaring the computer name that is part taking in the indexing. This file will then be used to + /// determine the master indexer machine in a load balanced environment (if one exists). + /// + /// The friendly name of the provider. + /// A collection of the name/value pairs representing the provider-specific attributes specified in the configuration for this provider. + /// + /// The name of the provider is null. + /// + /// + /// The name of the provider has a length of zero. + /// + /// + /// An attempt is made to call on a provider after the provider has already been initialized. + /// + public override void Initialize(string name, NameValueCollection config) + { + base.Initialize(name, config); + + if (config != null && config["useTempStorage"] != null) + { + //Use the temp storage directory which will store the index in the local/codegen folder, this is useful + // for websites that are running from a remove file server and file IO latency becomes an issue + var attemptUseTempStorage = config["useTempStorage"].TryConvertTo(); + if (attemptUseTempStorage) + { + var indexSet = IndexSets.Instance.Sets[IndexSetName]; + var configuredPath = indexSet.IndexPath; + + _localTempStorageHelper.Initialize(config, configuredPath, base.GetLuceneDirectory(), IndexingAnalyzer); + } + } + } + + public override Lucene.Net.Store.Directory GetLuceneDirectory() + { + //if temp local storage is configured use that, otherwise return the default + if (_localTempStorageHelper.LuceneDirectory != null) + { + return _localTempStorageHelper.LuceneDirectory; + } + + return base.GetLuceneDirectory(); + + } + + public override IndexWriter GetIndexWriter() + { + //if temp local storage is configured use that, otherwise return the default + if (_localTempStorageHelper.LuceneDirectory != null) + { + return new IndexWriter(GetLuceneDirectory(), IndexingAnalyzer, + //create the writer with the snapshotter, though that won't make too much a difference because we are not keeping the writer open unless using nrt + // which we are not currently. + _localTempStorageHelper.Snapshotter, + IndexWriter.MaxFieldLength.UNLIMITED); + } + + return base.GetIndexWriter(); + } + + /// /// Ensures that the'_searchEmail' is added to the user fields so that it is indexed - without having to modify the config /// ///