start integrating the local templ storage option for examine

This commit is contained in:
Shannon
2014-11-13 13:09:29 +11:00
parent d43b673092
commit 0688550143
7 changed files with 502 additions and 3 deletions

View File

@@ -0,0 +1,93 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using Lucene.Net.Store;
namespace UmbracoExamine.LocalStorage
{
public class LocalTempStorageDirectory : SimpleFSDirectory
{
private readonly Lucene.Net.Store.Directory _realDirectory;
public LocalTempStorageDirectory(
DirectoryInfo tempStorageDir,
Lucene.Net.Store.Directory realDirectory)
: base(tempStorageDir)
{
_realDirectory = realDirectory;
}
public override string[] ListAll()
{
//always from the real dir
return _realDirectory.ListAll();
}
/// <summary>Returns true if a file with the given name exists. </summary>
public override bool FileExists(string name)
{
//always from the real dir
return _realDirectory.FileExists(name);
}
/// <summary>Returns the time the named file was last modified. </summary>
public override long FileModified(string name)
{
//always from the real dir
return _realDirectory.FileModified(name);
}
/// <summary>Set the modified time of an existing file to now. </summary>
public override void TouchFile(string name)
{
//always from the real dir
_realDirectory.TouchFile(name);
}
/// <summary>Removes an existing file in the directory. </summary>
public override void DeleteFile(string name)
{
//perform on both dirs
_realDirectory.DeleteFile(name);
base.DeleteFile(name);
}
/// <summary>Returns the length of a file in the directory. </summary>
public override long FileLength(string name)
{
//always from the real dir
return _realDirectory.FileLength(name);
}
/// <summary>
/// Creates a new, empty file in the directory with the given name.
/// Returns a stream writing this file.
/// </summary>
public override IndexOutput CreateOutput(string name)
{
//write to both indexes
return new MultiIndexOutput(
base.CreateOutput(name),
_realDirectory.CreateOutput(name));
}
/// <summary>
/// Returns a stream reading an existing file.
/// </summary>
public override IndexInput OpenInput(string name)
{
//return the reader from the cache, not the real dir
return base.OpenInput(name);
}
public override void Dispose()
{
base.Dispose();
_realDirectory.Dispose();
}
}
}

View File

@@ -0,0 +1,153 @@
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Web;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Umbraco.Core;
using Umbraco.Core.IO;
using Umbraco.Core.Logging;
using Directory = System.IO.Directory;
namespace UmbracoExamine.LocalStorage
{
internal class LocalTempStorageIndexer
{
private string _tempPath;
public Lucene.Net.Store.Directory LuceneDirectory { get; private set; }
private static readonly object Locker = new object();
public SnapshotDeletionPolicy Snapshotter { get; private set; }
private bool _syncStorage = false;
public LocalTempStorageIndexer()
{
IndexDeletionPolicy policy = new KeepOnlyLastCommitDeletionPolicy();
Snapshotter = new SnapshotDeletionPolicy(policy);
}
public void Initialize(NameValueCollection config, string configuredPath, Lucene.Net.Store.Directory baseLuceneDirectory, Analyzer analyzer)
{
var codegenPath = HttpRuntime.CodegenDir;
_tempPath = Path.Combine(codegenPath, configuredPath.TrimStart('~', '/').Replace("/", "\\"));
if (config != null)
{
if (config["syncTempStorage"] != null)
{
var attempt = config["syncTempStorage"].TryConvertTo<bool>();
if (attempt)
{
_syncStorage = attempt.Result;
}
}
}
InitializeLocalIndexAndDirectory(baseLuceneDirectory, analyzer, configuredPath);
}
private void InitializeLocalIndexAndDirectory(Lucene.Net.Store.Directory baseLuceneDirectory, Analyzer analyzer, string configuredPath)
{
lock (Locker)
{
if (Directory.Exists(_tempPath) == false)
{
Directory.CreateDirectory(_tempPath);
}
//if we are syncing storage to the main file system to temp files, then sync from the main FS to our temp FS
if (_syncStorage)
{
//copy index
using (new IndexWriter(
//read from the underlying/default directory, not the temp codegen dir
baseLuceneDirectory,
analyzer,
Snapshotter,
IndexWriter.MaxFieldLength.UNLIMITED))
{
try
{
var basePath = IOHelper.MapPath(configuredPath);
var commit = Snapshotter.Snapshot();
var allSnapshotFiles = commit.GetFileNames().Concat(new[] {commit.GetSegmentsFileName()}).ToArray();
var tempDir = new DirectoryInfo(_tempPath);
//Get all files in the temp storage that don't exist in the snapshot collection, we want to remove these
var toRemove = tempDir.GetFiles()
.Select(x => x.Name)
.Except(allSnapshotFiles);
using (var tempDirectory = new SimpleFSDirectory(tempDir))
{
if (IndexWriter.IsLocked(tempDirectory) == false)
{
foreach (var file in toRemove)
{
try
{
File.Delete(Path.Combine(_tempPath, file));
}
catch (IOException ex)
{
LogHelper.Error<LocalTempStorageIndexer>("Could not delete index file, could not sync from main storage", ex);
//quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage
return;
}
}
}
else
{
LogHelper.Warn<LocalTempStorageIndexer>("Cannot sync index files from main storage, the index is currently locked");
//quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage
return;
}
}
foreach (var fileName in allSnapshotFiles.Where(f => f.IsNullOrWhiteSpace() == false))
{
try
{
File.Copy(
Path.Combine(basePath, "Index", fileName),
Path.Combine(_tempPath, Path.GetFileName(fileName)), true);
}
catch (IOException ex)
{
LogHelper.Error<LocalTempStorageIndexer>("Could not copy index file, could not sync from main storage", ex);
//quit here and do not assign the lucene directory, this means that the app will now just be working from normal storage
return;
}
}
}
finally
{
Snapshotter.Release();
}
}
//create the custom lucene directory which will keep the main and temp FS's in sync
LuceneDirectory = new LocalTempStorageDirectory(
new DirectoryInfo(_tempPath),
baseLuceneDirectory);
}
else
{
//just return a normal lucene directory that uses the codegen folder
LuceneDirectory = FSDirectory.Open(new DirectoryInfo(_tempPath));
}
}
}
}
}

View File

@@ -0,0 +1,72 @@
using System;
using System.Linq;
using Lucene.Net.Store;
namespace UmbracoExamine.LocalStorage
{
public class MultiIndexOutput : IndexOutput
{
private readonly IndexOutput[] _outputs;
public MultiIndexOutput(params IndexOutput[] outputs)
{
if (outputs.Length < 1)
{
throw new InvalidOperationException("There must be at least one output specified");
}
_outputs = outputs;
}
public override void WriteByte(byte b)
{
foreach (var output in _outputs)
{
output.WriteByte(b);
}
}
public override void WriteBytes(byte[] b, int offset, int length)
{
foreach (var output in _outputs)
{
output.WriteBytes(b, offset, length);
}
}
public override void Flush()
{
foreach (var output in _outputs)
{
output.Flush();
}
}
public override void Close()
{
foreach (var output in _outputs)
{
output.Close();
}
}
public override long GetFilePointer()
{
//return the first
return _outputs.First().GetFilePointer();
}
public override void Seek(long pos)
{
foreach (var output in _outputs)
{
output.Seek(pos);
}
}
public override long Length()
{
//return the first
return _outputs.First().GetFilePointer();
}
}
}

View File

@@ -12,6 +12,7 @@ using Examine;
using Examine.Config;
using Examine.Providers;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Umbraco.Core;
using umbraco.cms.businesslogic;
using UmbracoExamine.DataServices;
@@ -21,6 +22,7 @@ using UmbracoExamine.Config;
using Examine.LuceneEngine.Providers;
using Lucene.Net.Analysis;
using umbraco.BasePages;
using UmbracoExamine.LocalStorage;
namespace UmbracoExamine
@@ -65,6 +67,8 @@ namespace UmbracoExamine
#region Constants & Fields
private readonly LocalTempStorageIndexer _localTempStorageHelper = new LocalTempStorageIndexer();
/// <summary>
/// Used to store the path of a content object
/// </summary>
@@ -147,6 +151,22 @@ namespace UmbracoExamine
base.Initialize(name, config);
if (config != null && config["useTempStorage"] != null)
{
//Use the temp storage directory which will store the index in the local/codegen folder, this is useful
// for websites that are running from a remove file server and file IO latency becomes an issue
var attemptUseTempStorage = config["useTempStorage"].TryConvertTo<bool>();
if (attemptUseTempStorage)
{
var indexSet = IndexSets.Instance.Sets[IndexSetName];
var configuredPath = indexSet.IndexPath;
_localTempStorageHelper.Initialize(config, configuredPath, base.GetLuceneDirectory(), IndexingAnalyzer);
}
}
}
#endregion
@@ -220,6 +240,33 @@ namespace UmbracoExamine
#region Public methods
public override Lucene.Net.Store.Directory GetLuceneDirectory()
{
//if temp local storage is configured use that, otherwise return the default
if (_localTempStorageHelper.LuceneDirectory != null)
{
return _localTempStorageHelper.LuceneDirectory;
}
return base.GetLuceneDirectory();
}
public override IndexWriter GetIndexWriter()
{
//if temp local storage is configured use that, otherwise return the default
if (_localTempStorageHelper.LuceneDirectory != null)
{
return new IndexWriter(GetLuceneDirectory(), IndexingAnalyzer,
//create the writer with the snapshotter, though that won't make too much a difference because we are not keeping the writer open unless using nrt
// which we are not currently.
_localTempStorageHelper.Snapshotter,
IndexWriter.MaxFieldLength.UNLIMITED);
}
return base.GetIndexWriter();
}
/// <summary>
/// Overridden for logging

View File

@@ -121,6 +121,9 @@
<Compile Include="DataServices\UmbracoMediaService.cs" />
<Compile Include="IndexTypes.cs" />
<Compile Include="LegacyLibrary.cs" />
<Compile Include="LocalStorage\LocalTempStorageDirectory.cs" />
<Compile Include="LocalStorage\LocalTempStorageIndexer.cs" />
<Compile Include="LocalStorage\MultiIndexOutput.cs" />
<Compile Include="LoggingLevel.cs" />
<Compile Include="StaticField.cs" />
<Compile Include="UmbracoMemberIndexer.cs" />

View File

@@ -2,15 +2,19 @@
using System.IO;
using System.Linq;
using System.Security;
using System.Web;
using Examine;
using Examine.LuceneEngine.Config;
using Examine.Providers;
using Examine.SearchCriteria;
using Lucene.Net.Store;
using Umbraco.Core;
using UmbracoExamine.Config;
using Examine.LuceneEngine;
using Examine.LuceneEngine.Providers;
using Examine.LuceneEngine.SearchCriteria;
using Lucene.Net.Analysis;
using UmbracoExamine.LocalStorage;
namespace UmbracoExamine
@@ -21,6 +25,11 @@ namespace UmbracoExamine
public class UmbracoExamineSearcher : LuceneSearcher
{
private volatile Lucene.Net.Store.Directory _localTempDirectory;
private static readonly object Locker = new object();
private string _localTempPath = null;
private bool _syncTempStorage = false;
#region Constructors
/// <summary>
@@ -54,12 +63,35 @@ namespace UmbracoExamine
_name = name;
//We need to check if we actually can initialize, if not then don't continue
if (!CanInitialize())
if (CanInitialize() == false)
{
return;
}
base.Initialize(name, config);
if (config != null && config["useTempStorage"] != null)
{
//Use the temp storage directory which will store the index in the local/codegen folder, this is useful
// for websites that are running from a remove file server and file IO latency becomes an issue
var attemptUseTempStorage = config["useTempStorage"].TryConvertTo<bool>();
if (attemptUseTempStorage)
{
var indexSet = IndexSets.Instance.Sets[IndexSetName];
var configuredPath = indexSet.IndexPath;
var codegenPath = HttpRuntime.CodegenDir;
_localTempPath = Path.Combine(codegenPath, configuredPath.TrimStart('~', '/').Replace("/", "\\"));
}
if (config["syncTempStorage"] != null)
{
var attemptSync = config["syncTempStorage"].TryConvertTo<bool>();
if (attemptSync)
{
_syncTempStorage = attemptSync.Result;
}
}
}
}
/// <summary>
@@ -136,6 +168,36 @@ namespace UmbracoExamine
.Where(x => x != UmbracoContentIndexer.IndexPathFieldName)
.Where(x => x != UmbracoContentIndexer.NodeTypeAliasFieldName)
.ToArray();
}
}
protected override Lucene.Net.Store.Directory GetLuceneDirectory()
{
//local temp storage is not enabled, just return the default
if (_localTempPath == null) return base.GetLuceneDirectory();
//local temp storage is enabled, configure the local directory instance
if (_localTempDirectory == null)
{
lock (Locker)
{
if (_localTempDirectory == null)
{
if (_syncTempStorage)
{
_localTempDirectory = new LocalTempStorageDirectory(
new DirectoryInfo(_localTempPath),
base.GetLuceneDirectory());
}
else
{
//not syncing just use a normal lucene directory
_localTempDirectory = FSDirectory.Open(new DirectoryInfo(_localTempPath));
}
}
}
}
return _localTempDirectory;
}
}
}

View File

@@ -1,9 +1,12 @@
using System.Collections;
using System.Collections.Specialized;
using System.Linq;
using System.Security;
using System.Xml.Linq;
using System.Xml.XPath;
using Examine.LuceneEngine.Config;
using Lucene.Net.Index;
using Umbraco.Core;
using UmbracoExamine.Config;
using umbraco.cms.businesslogic.member;
using Examine.LuceneEngine;
@@ -12,6 +15,7 @@ using Examine;
using System.IO;
using UmbracoExamine.DataServices;
using Lucene.Net.Analysis;
using UmbracoExamine.LocalStorage;
namespace UmbracoExamine
{
@@ -21,6 +25,8 @@ namespace UmbracoExamine
public class UmbracoMemberIndexer : UmbracoContentIndexer
{
private readonly LocalTempStorageIndexer _localTempStorageHelper = new LocalTempStorageIndexer();
/// <summary>
/// Default constructor
/// </summary>
@@ -38,7 +44,70 @@ namespace UmbracoExamine
public UmbracoMemberIndexer(IIndexCriteria indexerData, DirectoryInfo indexPath, IDataService dataService, Analyzer analyzer, bool async)
: base(indexerData, indexPath, dataService, analyzer, async) { }
/// <summary>
/// <summary>
/// Set up all properties for the indexer based on configuration information specified. This will ensure that
/// all of the folders required by the indexer are created and exist. This will also create an instruction
/// file declaring the computer name that is part taking in the indexing. This file will then be used to
/// determine the master indexer machine in a load balanced environment (if one exists).
/// </summary>
/// <param name="name">The friendly name of the provider.</param>
/// <param name="config">A collection of the name/value pairs representing the provider-specific attributes specified in the configuration for this provider.</param>
/// <exception cref="T:System.ArgumentNullException">
/// The name of the provider is null.
/// </exception>
/// <exception cref="T:System.ArgumentException">
/// The name of the provider has a length of zero.
/// </exception>
/// <exception cref="T:System.InvalidOperationException">
/// An attempt is made to call <see cref="M:System.Configuration.Provider.ProviderBase.Initialize(System.String,System.Collections.Specialized.NameValueCollection)"/> on a provider after the provider has already been initialized.
/// </exception>
public override void Initialize(string name, NameValueCollection config)
{
base.Initialize(name, config);
if (config != null && config["useTempStorage"] != null)
{
//Use the temp storage directory which will store the index in the local/codegen folder, this is useful
// for websites that are running from a remove file server and file IO latency becomes an issue
var attemptUseTempStorage = config["useTempStorage"].TryConvertTo<bool>();
if (attemptUseTempStorage)
{
var indexSet = IndexSets.Instance.Sets[IndexSetName];
var configuredPath = indexSet.IndexPath;
_localTempStorageHelper.Initialize(config, configuredPath, base.GetLuceneDirectory(), IndexingAnalyzer);
}
}
}
public override Lucene.Net.Store.Directory GetLuceneDirectory()
{
//if temp local storage is configured use that, otherwise return the default
if (_localTempStorageHelper.LuceneDirectory != null)
{
return _localTempStorageHelper.LuceneDirectory;
}
return base.GetLuceneDirectory();
}
public override IndexWriter GetIndexWriter()
{
//if temp local storage is configured use that, otherwise return the default
if (_localTempStorageHelper.LuceneDirectory != null)
{
return new IndexWriter(GetLuceneDirectory(), IndexingAnalyzer,
//create the writer with the snapshotter, though that won't make too much a difference because we are not keeping the writer open unless using nrt
// which we are not currently.
_localTempStorageHelper.Snapshotter,
IndexWriter.MaxFieldLength.UNLIMITED);
}
return base.GetIndexWriter();
}
/// <summary>
/// Ensures that the'_searchEmail' is added to the user fields so that it is indexed - without having to modify the config
/// </summary>
/// <param name="indexSet"></param>