2017-07-20 11:21:28 +02:00
using System ;
2018-03-28 16:12:43 +11:00
using System.Collections ;
2016-09-08 18:43:58 +02:00
using System.Collections.Generic ;
using System.Globalization ;
using System.Linq ;
2016-10-18 19:16:46 +02:00
using System.Threading ;
2016-09-08 18:43:58 +02:00
using System.Xml.Linq ;
using Examine ;
using Examine.LuceneEngine ;
2018-03-28 16:12:43 +11:00
using Examine.LuceneEngine.Providers ;
2016-09-08 18:43:58 +02:00
using Lucene.Net.Documents ;
2018-03-28 16:12:43 +11:00
using Lucene.Net.Index ;
2016-09-08 18:43:58 +02:00
using Umbraco.Core ;
using Umbraco.Core.Cache ;
using Umbraco.Core.Components ;
using Umbraco.Core.Logging ;
using Umbraco.Core.Models ;
2016-10-18 19:16:46 +02:00
using Umbraco.Core.PropertyEditors ;
2016-09-08 18:43:58 +02:00
using Umbraco.Core.Services.Changes ;
2017-12-28 09:18:09 +01:00
using Umbraco.Core.Services.Implement ;
2016-09-08 18:43:58 +02:00
using Umbraco.Core.Sync ;
using Umbraco.Web.Cache ;
2017-05-30 18:13:11 +02:00
using Umbraco.Web.Composing ;
2016-10-18 19:16:46 +02:00
using Umbraco.Web.PropertyEditors ;
2017-07-27 12:01:38 +02:00
using Umbraco.Examine ;
2016-09-08 18:43:58 +02:00
namespace Umbraco.Web.Search
{
2016-10-07 14:34:55 +02:00
/// <summary>
2016-10-18 19:16:46 +02:00
/// Configures and installs Examine.
2016-10-07 14:34:55 +02:00
/// </summary>
[RuntimeLevel(MinLevel = RuntimeLevel.Run)]
public sealed class ExamineComponent : UmbracoComponentBase , IUmbracoCoreComponent
2017-07-20 11:21:28 +02:00
{
2018-03-28 16:12:43 +11:00
//fixme - we are injecting this which is nice, but we still use ExamineManager everywhere, we could instead interface IExamineManager?
private IExamineIndexCollectionAccessor _indexCollection ;
private static bool _disableExamineIndexing = false ;
private static volatile bool __isConfigured = false ;
private static readonly object IsConfiguredLocker = new object ( ) ;
public void Initialize ( IRuntimeState runtime , PropertyEditorCollection propertyEditors , IExamineIndexCollectionAccessor indexCollection , ProfilingLogger profilingLogger )
2017-07-20 11:21:28 +02:00
{
2018-03-28 16:12:43 +11:00
_indexCollection = indexCollection ;
2016-10-18 19:16:46 +02:00
2018-03-28 16:12:43 +11:00
//fixme we cannot inject MainDom since it's internal, so thsi is the only way we can get it, alternatively we can add the container to the container and resolve
//directly from the container but that's not nice either
if ( ! ( runtime is RuntimeState coreRuntime ) )
throw new NotSupportedException ( $"Unsupported IRuntimeState implementation {runtime.GetType().FullName}, expecting {typeof(RuntimeState).FullName}." ) ;
2016-10-18 19:16:46 +02:00
2018-03-28 16:12:43 +11:00
//We want to manage Examine's appdomain shutdown sequence ourselves so first we'll disable Examine's default behavior
//and then we'll use MainDom to control Examine's shutdown
ExamineManager . DisableDefaultHostingEnvironmentRegistration ( ) ;
//we want to tell examine to use a different fs lock instead of the default NativeFSFileLock which could cause problems if the appdomain
//terminates and in some rare cases would only allow unlocking of the file if IIS is forcefully terminated. Instead we'll rely on the simplefslock
//which simply checks the existence of the lock file
DirectoryTracker . DefaultLockFactory = d = >
{
var simpleFsLockFactory = new NoPrefixSimpleFsLockFactory ( d ) ;
return simpleFsLockFactory ;
} ;
//let's deal with shutting down Examine with MainDom
var examineShutdownRegistered = coreRuntime . MainDom . Register ( ( ) = >
{
using ( profilingLogger . TraceDuration < ExamineComponent > ( "Examine shutting down" ) )
2016-10-18 19:16:46 +02:00
{
2018-03-28 16:12:43 +11:00
//Due to the way Examine's own IRegisteredObject works, we'll first run it with immediate=false and then true so that
//it's correct subroutines are executed (otherwise we'd have to run this logic manually ourselves)
ExamineManager . Instance . Stop ( false ) ;
ExamineManager . Instance . Stop ( true ) ;
2016-10-18 19:16:46 +02:00
}
} ) ;
2018-03-28 16:12:43 +11:00
if ( ! examineShutdownRegistered )
{
profilingLogger . Logger . Debug < ExamineComponent > ( "Examine shutdown not registered, this appdomain is not the MainDom, Examine will be disabled" ) ;
2016-10-18 19:16:46 +02:00
2018-03-28 16:12:43 +11:00
//if we could not register the shutdown examine ourselves, it means we are not maindom! in this case all of examine should be disabled!
Suspendable . ExamineEvents . SuspendIndexers ( ) ;
_disableExamineIndexing = true ;
return ; //exit, do not continue
}
profilingLogger . Logger . Debug < ExamineComponent > ( "Examine shutdown registered with MainDom" ) ;
var registeredIndexers = indexCollection . Indexes . Values . OfType < UmbracoExamineIndexer > ( ) . Count ( x = > x . EnableDefaultEventHandler ) ;
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
profilingLogger . Logger . Info < ExamineComponent > ( $"Adding examine event handlers for {registeredIndexers} index providers." ) ;
2016-09-08 18:43:58 +02:00
2017-07-20 11:21:28 +02:00
// don't bind event handlers if we're not suppose to listen
2018-03-28 16:12:43 +11:00
if ( registeredIndexers = = 0 )
2017-07-20 11:21:28 +02:00
return ;
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
BindGridToExamine ( profilingLogger . Logger , indexCollection , propertyEditors ) ;
2016-09-08 18:43:58 +02:00
// bind to distributed cache events - this ensures that this logic occurs on ALL servers
// that are taking part in a load balanced environment.
2017-07-20 11:21:28 +02:00
ContentCacheRefresher . CacheUpdated + = ContentCacheRefresherUpdated ;
2016-09-08 18:43:58 +02:00
MediaCacheRefresher . CacheUpdated + = MediaCacheRefresherUpdated ;
MemberCacheRefresher . CacheUpdated + = MemberCacheRefresherUpdated ;
2016-12-14 14:06:30 +01:00
// fixme - content type?
// events handling removed in ef013f9d3b945d0a48a306ff1afbd49c10c3fff8
// because, could not make sense of it?
2018-03-28 16:12:43 +11:00
EnsureUnlocked ( profilingLogger . Logger , indexCollection ) ;
RebuildIndexesOnStartup ( profilingLogger . Logger ) ;
2017-07-20 11:21:28 +02:00
}
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
/// <summary>
/// Called to rebuild empty indexes on startup
/// </summary>
/// <param name="logger"></param>
private void RebuildIndexesOnStartup ( ILogger logger )
2016-10-18 19:16:46 +02:00
{
2018-03-28 16:12:43 +11:00
//TODO: need a way to disable rebuilding on startup
logger . Info < ExamineComponent > ( "Starting initialize async background thread." ) ;
// make it async in order not to slow down the boot
// fixme - should be a proper background task else we cannot stop it!
var bg = new Thread ( ( ) = >
{
try
{
// rebuilds any empty indexes
RebuildIndexes ( true , _indexCollection , logger ) ;
}
catch ( Exception e )
{
logger . Error < ExamineComponent > ( "Failed to rebuild empty indexes." , e ) ;
}
} ) ;
bg . Start ( ) ;
}
/// <summary>
/// Used to rebuild indexes on startup or cold boot
/// </summary>
/// <param name="onlyEmptyIndexes"></param>
/// <param name="indexCollection"></param>
/// <param name="logger"></param>
internal static void RebuildIndexes ( bool onlyEmptyIndexes , IExamineIndexCollectionAccessor indexCollection , ILogger logger )
{
//do not attempt to do this if this has been disabled since we are not the main dom.
//this can be called during a cold boot
if ( _disableExamineIndexing ) return ;
EnsureUnlocked ( logger , indexCollection ) ;
2016-10-18 19:16:46 +02:00
if ( onlyEmptyIndexes )
2018-03-28 16:12:43 +11:00
{
foreach ( var indexer in indexCollection . Indexes . Values . Where ( x = > x . IsIndexNew ( ) ) )
{
indexer . RebuildIndex ( ) ;
}
}
else
{
//do all of them
ExamineManager . Instance . RebuildIndexes ( ) ;
}
2016-10-18 19:16:46 +02:00
}
2018-03-28 16:12:43 +11:00
/// <summary>
/// Must be called to each index is unlocked before any indexing occurs
/// </summary>
/// <remarks>
/// Indexing rebuilding can occur on a normal boot if the indexes are empty or on a cold boot by the database server messenger. Before
/// either of these happens, we need to configure the indexes.
/// </remarks>
private static void EnsureUnlocked ( ILogger logger , IExamineIndexCollectionAccessor indexCollection )
2016-10-18 19:16:46 +02:00
{
2018-03-28 16:12:43 +11:00
if ( _disableExamineIndexing ) return ;
if ( __isConfigured ) return ;
lock ( IsConfiguredLocker )
{
//double chekc
if ( __isConfigured ) return ;
__isConfigured = true ;
foreach ( var luceneIndexer in indexCollection . Indexes . Values . OfType < LuceneIndexer > ( ) )
{
//We now need to disable waiting for indexing for Examine so that the appdomain is shutdown immediately and doesn't wait for pending
//indexing operations. We used to wait for indexing operations to complete but this can cause more problems than that is worth because
//that could end up halting shutdown for a very long time causing overlapping appdomains and many other problems.
luceneIndexer . WaitForIndexQueueOnShutdown = false ;
//we should check if the index is locked ... it shouldn't be! We are using simple fs lock now and we are also ensuring that
//the indexes are not operational unless MainDom is true
var dir = luceneIndexer . GetLuceneDirectory ( ) ;
if ( IndexWriter . IsLocked ( dir ) )
{
logger . Info < ExamineComponent > ( "Forcing index " + luceneIndexer . Name + " to be unlocked since it was left in a locked state" ) ;
IndexWriter . Unlock ( dir ) ;
}
}
}
2016-10-18 19:16:46 +02:00
}
2018-03-28 16:12:43 +11:00
private static void BindGridToExamine ( ILogger logger , IExamineIndexCollectionAccessor indexCollection , IEnumerable propertyEditors )
{
//bind the grid property editors - this is a hack until http://issues.umbraco.org/issue/U4-8437
try
{
var grid = propertyEditors . OfType < GridPropertyEditor > ( ) . FirstOrDefault ( ) ;
if ( grid ! = null )
{
foreach ( var i in indexCollection . Indexes . Values . OfType < UmbracoExamineIndexer > ( ) )
i . DocumentWriting + = grid . DocumentWriting ;
}
}
catch ( Exception e )
{
logger . Error < ExamineComponent > ( "Failed to bind grid property editor." , e ) ;
}
}
private void MemberCacheRefresherUpdated ( MemberCacheRefresher sender , CacheRefresherEventArgs args )
2017-07-20 11:21:28 +02:00
{
2017-09-19 15:51:47 +02:00
if ( Suspendable . ExamineEvents . CanIndex = = false )
return ;
2016-09-08 18:43:58 +02:00
switch ( args . MessageType )
{
case MessageType . RefreshById :
var c1 = Current . Services . MemberService . GetById ( ( int ) args . MessageObject ) ;
if ( c1 ! = null )
{
ReIndexForMember ( c1 ) ;
}
break ;
case MessageType . RemoveById :
// This is triggered when the item is permanently deleted
DeleteIndexForEntity ( ( int ) args . MessageObject , false ) ;
break ;
case MessageType . RefreshByInstance :
2018-03-27 18:14:21 +11:00
if ( args . MessageObject is IMember c3 )
2016-09-08 18:43:58 +02:00
{
ReIndexForMember ( c3 ) ;
}
break ;
case MessageType . RemoveByInstance :
// This is triggered when the item is permanently deleted
2018-03-27 18:14:21 +11:00
if ( args . MessageObject is IMember c4 )
2016-09-08 18:43:58 +02:00
{
DeleteIndexForEntity ( c4 . Id , false ) ;
}
break ;
case MessageType . RefreshAll :
case MessageType . RefreshByJson :
default :
//We don't support these, these message types will not fire for unpublished content
break ;
}
2017-07-20 11:21:28 +02:00
}
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
private void MediaCacheRefresherUpdated ( MediaCacheRefresher sender , CacheRefresherEventArgs args )
2016-09-08 18:43:58 +02:00
{
2017-09-19 15:51:47 +02:00
if ( Suspendable . ExamineEvents . CanIndex = = false )
return ;
2016-09-08 18:43:58 +02:00
if ( args . MessageType ! = MessageType . RefreshByPayload )
throw new NotSupportedException ( ) ;
var mediaService = Current . Services . MediaService ;
2017-07-20 11:21:28 +02:00
foreach ( var payload in ( MediaCacheRefresher . JsonPayload [ ] ) args . MessageObject )
{
2016-09-08 18:43:58 +02:00
if ( payload . ChangeTypes . HasType ( TreeChangeTypes . Remove ) )
2017-07-20 11:21:28 +02:00
{
2016-09-08 18:43:58 +02:00
// remove from *all* indexes
2017-07-20 11:21:28 +02:00
DeleteIndexForEntity ( payload . Id , false ) ;
}
2016-09-08 18:43:58 +02:00
else if ( payload . ChangeTypes . HasType ( TreeChangeTypes . RefreshAll ) )
{
// ExamineEvents does not support RefreshAll
// just ignore that payload
// so what?!
}
else // RefreshNode or RefreshBranch (maybe trashed)
{
2017-07-20 11:21:28 +02:00
var media = mediaService . GetById ( payload . Id ) ;
2016-09-08 18:43:58 +02:00
if ( media = = null | | media . Trashed )
{
// gone fishing, remove entirely
DeleteIndexForEntity ( payload . Id , false ) ;
continue ;
}
// just that media
ReIndexForMedia ( media , media . Trashed = = false ) ;
// branch
if ( payload . ChangeTypes . HasType ( TreeChangeTypes . RefreshBranch ) )
{
var descendants = mediaService . GetDescendants ( media ) ;
foreach ( var descendant in descendants )
{
ReIndexForMedia ( descendant , descendant . Trashed = = false ) ;
}
}
}
2017-07-20 11:21:28 +02:00
}
2016-09-08 18:43:58 +02:00
}
2018-03-28 16:12:43 +11:00
private void ContentCacheRefresherUpdated ( ContentCacheRefresher sender , CacheRefresherEventArgs args )
2017-07-20 11:21:28 +02:00
{
2017-09-19 15:51:47 +02:00
if ( Suspendable . ExamineEvents . CanIndex = = false )
return ;
2016-09-08 18:43:58 +02:00
if ( args . MessageType ! = MessageType . RefreshByPayload )
throw new NotSupportedException ( ) ;
2017-07-20 11:21:28 +02:00
var contentService = Current . Services . ContentService ;
2016-09-08 18:43:58 +02:00
2017-07-20 11:21:28 +02:00
foreach ( var payload in ( ContentCacheRefresher . JsonPayload [ ] ) args . MessageObject )
{
2016-09-08 18:43:58 +02:00
if ( payload . ChangeTypes . HasType ( TreeChangeTypes . Remove ) )
2017-07-20 11:21:28 +02:00
{
2016-09-08 18:43:58 +02:00
// delete content entirely (with descendants)
// false: remove entirely from all indexes
DeleteIndexForEntity ( payload . Id , false ) ;
2017-07-20 11:21:28 +02:00
}
2016-09-08 18:43:58 +02:00
else if ( payload . ChangeTypes . HasType ( TreeChangeTypes . RefreshAll ) )
{
// ExamineEvents does not support RefreshAll
// just ignore that payload
// so what?!
}
else // RefreshNode or RefreshBranch (maybe trashed)
{
2017-07-20 11:21:28 +02:00
// don't try to be too clever - refresh entirely
2016-09-08 18:43:58 +02:00
// there has to be race conds in there ;-(
2017-07-20 11:21:28 +02:00
var content = contentService . GetById ( payload . Id ) ;
2016-09-08 18:43:58 +02:00
if ( content = = null | | content . Trashed )
{
// gone fishing, remove entirely from all indexes (with descendants)
DeleteIndexForEntity ( payload . Id , false ) ;
continue ;
}
IContent published = null ;
2017-11-15 08:53:20 +01:00
if ( content . Published & & ( ( ContentService ) contentService ) . IsPathPublished ( content ) )
published = content ;
2016-09-08 18:43:58 +02:00
// just that content
ReIndexForContent ( content , published ) ;
// branch
if ( payload . ChangeTypes . HasType ( TreeChangeTypes . RefreshBranch ) )
{
var masked = published = = null ? null : new List < int > ( ) ;
var descendants = contentService . GetDescendants ( content ) ;
foreach ( var descendant in descendants )
{
published = null ;
if ( masked ! = null ) // else everything is masked
{
2017-11-15 08:53:20 +01:00
if ( masked . Contains ( descendant . ParentId ) | | ! descendant . Published )
2016-09-08 18:43:58 +02:00
masked . Add ( descendant . Id ) ;
else
2017-11-15 08:53:20 +01:00
published = descendant ;
2016-09-08 18:43:58 +02:00
}
ReIndexForContent ( descendant , published ) ;
}
}
}
// NOTE
//
// DeleteIndexForEntity is handled by UmbracoContentIndexer.DeleteFromIndex() which takes
// care of also deleting the descendants
//
// ReIndexForContent is NOT taking care of descendants so we have to reload everything
// again in order to process the branch - we COULD improve that by just reloading the
// XML from database instead of reloading content & re-serializing!
2017-07-20 11:21:28 +02:00
}
}
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
private void ReIndexForContent ( IContent content , IContent published )
2016-09-08 18:43:58 +02:00
{
2017-12-01 19:29:54 +01:00
if ( published ! = null & & content . VersionId = = published . VersionId )
2016-09-08 18:43:58 +02:00
{
ReIndexForContent ( content ) ; // same = both
}
else
{
if ( published = = null )
{
// remove 'published' - keep 'draft'
DeleteIndexForEntity ( content . Id , true ) ;
}
else
{
// index 'published' - don't overwrite 'draft'
ReIndexForContent ( published , false ) ;
}
ReIndexForContent ( content , true ) ; // index 'draft'
}
}
2018-03-28 16:12:43 +11:00
private void ReIndexForContent ( IContent sender , bool? supportUnpublished = null )
2016-09-08 18:43:58 +02:00
{
2018-03-27 18:14:21 +11:00
var valueSet = UmbracoContentIndexer . GetValueSets ( Current . UrlSegmentProviders , Current . Services . UserService , sender ) ;
2016-09-08 18:43:58 +02:00
2018-03-27 18:14:21 +11:00
ExamineManager . Instance . IndexItems (
2018-03-28 16:12:43 +11:00
valueSet . ToArray ( ) ,
_indexCollection . Indexes . Values . OfType < UmbracoContentIndexer > ( )
2016-09-08 18:43:58 +02:00
// only for the specified indexers
. Where ( x = > supportUnpublished . HasValue = = false | | supportUnpublished . Value = = x . SupportUnpublishedContent )
. Where ( x = > x . EnableDefaultEventHandler ) ) ;
}
2018-03-28 16:12:43 +11:00
private void ReIndexForMember ( IMember member )
2017-07-20 11:21:28 +02:00
{
2018-03-27 18:14:21 +11:00
var valueSet = UmbracoMemberIndexer . GetValueSets ( member ) ;
ExamineManager . Instance . IndexItems (
valueSet . ToArray ( ) ,
2018-03-28 16:12:43 +11:00
_indexCollection . Indexes . Values . OfType < UmbracoExamineIndexer > ( )
2016-09-08 18:43:58 +02:00
//ensure that only the providers are flagged to listen execute
2017-07-20 11:21:28 +02:00
. Where ( x = > x . EnableDefaultEventHandler ) ) ;
}
2016-09-08 18:43:58 +02:00
2018-03-28 16:12:43 +11:00
private void ReIndexForMedia ( IMedia sender , bool isMediaPublished )
2016-09-08 18:43:58 +02:00
{
2018-03-27 18:14:21 +11:00
var valueSet = UmbracoContentIndexer . GetValueSets ( Current . UrlSegmentProviders , Current . Services . UserService , sender ) ;
2016-09-08 18:43:58 +02:00
2018-03-27 18:14:21 +11:00
ExamineManager . Instance . IndexItems (
valueSet . ToArray ( ) ,
2018-03-28 16:12:43 +11:00
_indexCollection . Indexes . Values . OfType < UmbracoContentIndexer > ( )
2016-09-08 18:43:58 +02:00
// index this item for all indexers if the media is not trashed, otherwise if the item is trashed
// then only index this for indexers supporting unpublished media
. Where ( x = > isMediaPublished | | ( x . SupportUnpublishedContent ) )
. Where ( x = > x . EnableDefaultEventHandler ) ) ;
}
/// <summary>
/// Remove items from any index that doesn't support unpublished content
/// </summary>
/// <param name="entityId"></param>
/// <param name="keepIfUnpublished">
/// If true, indicates that we will only delete this item from indexes that don't support unpublished content.
/// If false it will delete this from all indexes regardless.
/// </param>
2018-03-28 16:12:43 +11:00
private void DeleteIndexForEntity ( int entityId , bool keepIfUnpublished )
2016-09-08 18:43:58 +02:00
{
2018-03-28 16:12:43 +11:00
ExamineManager . Instance . DeleteFromIndexes (
2016-09-08 18:43:58 +02:00
entityId . ToString ( CultureInfo . InvariantCulture ) ,
2018-03-28 16:12:43 +11:00
_indexCollection . Indexes . Values . OfType < UmbracoExamineIndexer > ( )
2016-09-08 18:43:58 +02:00
// if keepIfUnpublished == true then only delete this item from indexes not supporting unpublished content,
// otherwise if keepIfUnpublished == false then remove from all indexes
. Where ( x = > keepIfUnpublished = = false | | ( x is UmbracoContentIndexer & & ( ( UmbracoContentIndexer ) x ) . SupportUnpublishedContent = = false ) )
. Where ( x = > x . EnableDefaultEventHandler ) ) ;
}
2017-07-20 11:21:28 +02:00
}
}