Files
Umbraco-CMS/src/UmbracoExamine/BaseUmbracoIndexer.cs

520 lines
20 KiB
C#

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Linq;
using System.Net;
using System.Security;
using System.Text;
using System.Threading;
using System.Web;
using Examine.LuceneEngine.Config;
using Examine.LuceneEngine.Providers;
using Examine.Providers;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Umbraco.Core;
using umbraco.BasePages;
using umbraco.BusinessLogic;
using UmbracoExamine.DataServices;
using Examine;
using System.IO;
using System.Xml.Linq;
using Lucene.Net.Store;
using UmbracoExamine.LocalStorage;
namespace UmbracoExamine
{
/// <summary>
/// An abstract provider containing the basic functionality to be able to query against
/// Umbraco data.
/// </summary>
public abstract class BaseUmbracoIndexer : LuceneIndexer
{
// note
// wrapping all operations that end up calling base.SafelyProcessQueueItems in a safe call
// context because they will fork a thread/task/whatever which should *not* capture our
// call context (and the database it can contain)! ideally we should be able to override
// SafelyProcessQueueItems but that's not possible in the current version of Examine.
#region Constructors
/// <summary>
/// Default constructor
/// </summary>
protected BaseUmbracoIndexer()
: base()
{
}
/// <summary>
/// Constructor to allow for creating an indexer at runtime
/// </summary>
/// <param name="indexerData"></param>
/// <param name="indexPath"></param>
/// <param name="dataService"></param>
/// <param name="analyzer"></param>
/// <param name="async"></param>
protected BaseUmbracoIndexer(IIndexCriteria indexerData, DirectoryInfo indexPath, IDataService dataService, Analyzer analyzer, bool async)
: base(indexerData, indexPath, analyzer, async)
{
DataService = dataService;
}
protected BaseUmbracoIndexer(IIndexCriteria indexerData, Lucene.Net.Store.Directory luceneDirectory, IDataService dataService, Analyzer analyzer, bool async)
: base(indexerData, luceneDirectory, analyzer, async)
{
DataService = dataService;
}
/// <summary>
/// Creates an NRT indexer
/// </summary>
/// <param name="indexerData"></param>
/// <param name="writer"></param>
/// <param name="async"></param>
/// <param name="dataService"></param>
protected BaseUmbracoIndexer(IIndexCriteria indexerData, IndexWriter writer, IDataService dataService, bool async)
: base(indexerData, writer, async)
{
DataService = dataService;
}
#endregion
/// <summary>
/// Used for unit tests
/// </summary>
internal static bool? DisableInitializationCheck = null;
private readonly LocalTempStorageIndexer _localTempStorageIndexer = new LocalTempStorageIndexer();
private BaseLuceneSearcher _internalTempStorageSearcher = null;
#region Properties
public bool UseTempStorage
{
get { return _localTempStorageIndexer.LuceneDirectory != null; }
}
public string TempStorageLocation
{
get
{
if (UseTempStorage == false) return string.Empty;
return _localTempStorageIndexer.TempPath;
}
}
/// <summary>
/// If true, the IndexingActionHandler will be run to keep the default index up to date.
/// </summary>
public bool EnableDefaultEventHandler { get; protected set; }
/// <summary>
/// Determines if the manager will call the indexing methods when content is saved or deleted as
/// opposed to cache being updated.
/// </summary>
public bool SupportUnpublishedContent { get; protected internal set; }
/// <summary>
/// The data service used for retreiving and submitting data to the cms
/// </summary>
public IDataService DataService { get; protected internal set; }
/// <summary>
/// the supported indexable types
/// </summary>
protected abstract IEnumerable<string> SupportedTypes { get; }
#endregion
#region Initialize
/// <summary>
/// Setup the properties for the indexer from the provider settings
/// </summary>
/// <param name="name"></param>
/// <param name="config"></param>
public override void Initialize(string name, System.Collections.Specialized.NameValueCollection config)
{
if (config["dataService"] != null && !string.IsNullOrEmpty(config["dataService"]))
{
//this should be a fully qualified type
var serviceType = Type.GetType(config["dataService"]);
DataService = (IDataService)Activator.CreateInstance(serviceType);
}
else if (DataService == null)
{
//By default, we will be using the UmbracoDataService
//generally this would only need to be set differently for unit testing
DataService = CreateDefaultUmbracoDataService();
}
DataService.LogService.LogLevel = LoggingLevel.Normal;
if (config["logLevel"] != null && !string.IsNullOrEmpty(config["logLevel"]))
{
try
{
var logLevel = (LoggingLevel)Enum.Parse(typeof(LoggingLevel), config["logLevel"]);
DataService.LogService.LogLevel = logLevel;
}
catch (ArgumentException)
{
//FAILED
DataService.LogService.LogLevel = LoggingLevel.Normal;
}
}
DataService.LogService.ProviderName = name;
EnableDefaultEventHandler = true; //set to true by default
bool enabled;
if (bool.TryParse(config["enableDefaultEventHandler"], out enabled))
{
EnableDefaultEventHandler = enabled;
}
DataService.LogService.AddVerboseLog(-1, string.Format("{0} indexer initializing", name));
base.Initialize(name, config);
//NOTES: useTempStorage is obsolete, tempStorageDirectory is obsolete, both have been superceded by Examine Core's IDirectoryFactory
// tempStorageDirectory never actually got finished in Umbraco Core but accidentally got shipped (it's only enabled on the searcher
// and not the indexer). So this whole block is just legacy
//detect if a dir factory has been specified, if so then useTempStorage will not be used (deprecated)
if (config["directoryFactory"] == null && config["useTempStorage"] != null)
{
var fsDir = base.GetLuceneDirectory() as FSDirectory;
if (fsDir != null)
{
//Use the temp storage directory which will store the index in the local/codegen folder, this is useful
// for websites that are running from a remove file server and file IO latency becomes an issue
var attemptUseTempStorage = config["useTempStorage"].TryConvertTo<LocalStorageType>();
if (attemptUseTempStorage)
{
var indexSet = IndexSets.Instance.Sets[IndexSetName];
var configuredPath = indexSet.IndexPath;
_localTempStorageIndexer.Initialize(config, configuredPath, fsDir, IndexingAnalyzer, attemptUseTempStorage.Result);
}
}
}
}
#endregion
protected virtual IDataService CreateDefaultUmbracoDataService()
{
return new UmbracoDataService();
}
/// <summary>
/// Used to aquire the internal searcher
/// </summary>
private readonly object _internalSearcherLocker = new object();
protected override BaseSearchProvider InternalSearcher
{
get
{
//if temp local storage is configured use that, otherwise return the default
if (UseTempStorage)
{
if (_internalTempStorageSearcher == null)
{
lock (_internalSearcherLocker)
{
if (_internalTempStorageSearcher == null)
{
_internalTempStorageSearcher = new LuceneSearcher(GetIndexWriter(), IndexingAnalyzer);
}
}
}
return _internalTempStorageSearcher;
}
return base.InternalSearcher;
}
}
public override Lucene.Net.Store.Directory GetLuceneDirectory()
{
//if temp local storage is configured use that, otherwise return the default
if (UseTempStorage)
{
return _localTempStorageIndexer.LuceneDirectory;
}
return base.GetLuceneDirectory();
}
protected override IndexWriter CreateIndexWriter()
{
//if temp local storage is configured use that, otherwise return the default
if (UseTempStorage)
{
var directory = GetLuceneDirectory();
return new IndexWriter(GetLuceneDirectory(), IndexingAnalyzer,
DeletePolicyTracker.Current.GetPolicy(directory),
IndexWriter.MaxFieldLength.UNLIMITED);
}
return base.CreateIndexWriter();
}
///// <summary>
///// Override to check if we can actually initialize.
///// </summary>
///// <returns></returns>
///// <remarks>
///// This check is required since the base examine lib will try to check this method on app startup. If the app
///// is not ready then we need to deal with it otherwise the base class will throw exceptions since we've bypassed initialization.
///// </remarks>
//public override bool IndexExists()
//{
// return base.IndexExists();
//}
/// <summary>
/// override to check if we can actually initialize.
/// </summary>
/// <remarks>
/// This check is required since the base examine lib will try to rebuild on startup
/// </remarks>
public override void RebuildIndex()
{
if (CanInitialize())
{
// remove the db from lcc
using (new SafeCallContext())
//using (ApplicationContext.Current.DatabaseContext.UseSafeDatabase())
{
base.RebuildIndex();
} // will try to re-instate the original DB *but* if a DB has been created in the meantime what shall we do?
}
}
/// <summary>
/// override to check if we can actually initialize.
/// </summary>
/// <remarks>
/// This check is required since the base examine lib will try to rebuild on startup
/// </remarks>
public override void IndexAll(string type)
{
if (CanInitialize())
{
using (new SafeCallContext())
{
base.IndexAll(type);
}
}
}
public override void ReIndexNode(XElement node, string type)
{
if (CanInitialize())
{
if (!SupportedTypes.Contains(type))
return;
using (new SafeCallContext())
{
base.ReIndexNode(node, type);
}
}
}
/// <summary>
/// override to check if we can actually initialize.
/// </summary>
/// <remarks>
/// This check is required since the base examine lib will try to rebuild on startup
/// </remarks>
public override void DeleteFromIndex(string nodeId)
{
if (CanInitialize())
{
using (new SafeCallContext())
{
base.DeleteFromIndex(nodeId);
}
}
}
#region Protected
/// <summary>
/// Returns true if the Umbraco application is in a state that we can initialize the examine indexes
/// </summary>
/// <returns></returns>
protected bool CanInitialize()
{
//check the DisableInitializationCheck and ensure that it is not set to true
if (!DisableInitializationCheck.HasValue || !DisableInitializationCheck.Value)
{
//We need to check if we actually can initialize, if not then don't continue
if (ApplicationContext.Current == null
|| !ApplicationContext.Current.IsConfigured
|| !ApplicationContext.Current.DatabaseContext.IsDatabaseConfigured)
{
return false;
}
}
return true;
}
/// <summary>
/// Ensures that the node being indexed is of a correct type and is a descendent of the parent id specified.
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
protected override bool ValidateDocument(XElement node)
{
//check if this document is a descendent of the parent
if (IndexerData.ParentNodeId.HasValue && IndexerData.ParentNodeId.Value > 0)
if (!((string)node.Attribute("path")).Contains("," + IndexerData.ParentNodeId.Value.ToString() + ","))
return false;
return base.ValidateDocument(node);
}
/// <summary>
/// Reindexes all supported types
/// </summary>
protected override void PerformIndexRebuild()
{
foreach (var t in SupportedTypes)
{
IndexAll(t);
}
}
/// <summary>
/// Builds an xpath statement to query against Umbraco data for the index type specified, then
/// initiates the re-indexing of the data matched.
/// </summary>
/// <param name="type"></param>
protected override void PerformIndexAll(string type)
{
//NOTE: the logic below is NOT used, this method is overridden
// and we query directly against the umbraco service layer.
// This is here for backwards compat only.
if (SupportedTypes.Contains(type) == false)
return;
var xPath = "//*[(number(@id) > 0 and (@isDoc or @nodeTypeAlias)){0}]"; //we'll add more filters to this below if needed
var sb = new StringBuilder();
//create the xpath statement to match node type aliases if specified
if (IndexerData.IncludeNodeTypes.Any())
{
sb.Append("(");
foreach (var field in IndexerData.IncludeNodeTypes)
{
//this can be used across both schemas
const string nodeTypeAlias = "(@nodeTypeAlias='{0}' or (count(@nodeTypeAlias)=0 and name()='{0}'))";
sb.Append(string.Format(nodeTypeAlias, field));
sb.Append(" or ");
}
sb.Remove(sb.Length - 4, 4); //remove last " or "
sb.Append(")");
}
//create the xpath statement to match all children of the current node.
if (IndexerData.ParentNodeId.HasValue && IndexerData.ParentNodeId.Value > 0)
{
if (sb.Length > 0)
sb.Append(" and ");
sb.Append("(");
sb.Append("contains(@path, '," + IndexerData.ParentNodeId.Value + ",')"); //if the path contains comma - id - comma then the nodes must be a child
sb.Append(")");
}
//create the full xpath statement to match the appropriate nodes. If there is a filter
//then apply it, otherwise just select all nodes.
var filter = sb.ToString();
xPath = string.Format(xPath, filter.Length > 0 ? " and " + filter : "");
//raise the event and set the xpath statement to the value returned
var args = new IndexingNodesEventArgs(IndexerData, xPath, type);
OnNodesIndexing(args);
if (args.Cancel)
{
return;
}
xPath = args.XPath;
DataService.LogService.AddVerboseLog(-1, string.Format("({0}) PerformIndexAll with XPATH: {1}", this.Name, xPath));
AddNodesToIndex(xPath, type);
}
[Obsolete("This method is not be used, it will be removed in future versions")]
[EditorBrowsable(EditorBrowsableState.Never)]
protected virtual XDocument GetXDocument(string xPath, string type)
{
//TODO: We need to get rid of this! This does not get called by our code
if (type == IndexTypes.Content)
{
if (this.SupportUnpublishedContent)
{
return DataService.ContentService.GetLatestContentByXPath(xPath);
}
else
{
return DataService.ContentService.GetPublishedContentByXPath(xPath);
}
}
else if (type == IndexTypes.Media)
{
return DataService.MediaService.GetLatestMediaByXpath(xPath);
}
return null;
}
#endregion
[Obsolete("This method is not be used, it will be removed in future versions")]
[EditorBrowsable(EditorBrowsableState.Never)]
private void AddNodesToIndex(string xPath, string type)
{
using (new SafeCallContext())
{
// Get all the nodes of nodeTypeAlias == nodeTypeAlias
XDocument xDoc = GetXDocument(xPath, type);
if (xDoc != null)
{
var rootNode = xDoc.Root;
if (rootNode != null)
{
//the result will either be a single doc with an id as the root, or it will
// be multiple docs with a <nodes> wrapper, we need to check for this
if (rootNode.HasAttributes)
{
AddNodesToIndex(new[] { rootNode }, type);
}
else
{
AddNodesToIndex(rootNode.Elements(), type);
}
}
}
}
}
}
}