using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Threading; using System.Xml.Linq; using Examine; using Examine.LuceneEngine; using Examine.Session; using Lucene.Net.Documents; using Umbraco.Core; using Umbraco.Core.Cache; using Umbraco.Core.Components; using Umbraco.Core.Logging; using Umbraco.Core.Models; using Umbraco.Core.PropertyEditors; using Umbraco.Core.Services; using Umbraco.Core.Services.Changes; using Umbraco.Core.Sync; using Umbraco.Web.Cache; using Umbraco.Web.Composing; using Umbraco.Web.PropertyEditors; using UmbracoExamine; namespace Umbraco.Web.Search { /// /// Configures and installs Examine. /// [RuntimeLevel(MinLevel = RuntimeLevel.Run)] public sealed class ExamineComponent : UmbracoComponentBase, IUmbracoCoreComponent { public void Initialize(IRuntimeState runtime, PropertyEditorCollection propertyEditors, IExamineIndexCollectionAccessor indexCollection, ILogger logger) { logger.Info("Starting initialize async background thread."); // make it async in order not to slow down the boot // fixme - should be a proper background task else we cannot stop it! var bg = new Thread(() => { try { // from WebRuntimeComponent // rebuilds any empty indexes RebuildIndexes(true); } catch (Exception e) { logger.Error("Failed to rebuild empty indexes.", e); } try { // from PropertyEditorsComponent var grid = propertyEditors.OfType().FirstOrDefault(); if (grid != null) BindGridToExamine(grid, indexCollection); } catch (Exception e) { logger.Error("Failed to bind grid property editor.", e); } }); bg.Start(); // the rest is the original Examine event handler logger.Info("Initialize and bind to business logic events."); //TODO: For now we'll make this true, it means that indexes will be near real time // we'll see about what implications this may have - should be great in most scenarios DefaultExamineSession.RequireImmediateConsistency = true; var registeredProviders = ExamineManager.Instance.IndexProviderCollection .OfType().Count(x => x.EnableDefaultEventHandler); logger.Info($"Adding examine event handlers for {registeredProviders} index providers."); // don't bind event handlers if we're not suppose to listen if (registeredProviders == 0) return; // bind to distributed cache events - this ensures that this logic occurs on ALL servers // that are taking part in a load balanced environment. ContentCacheRefresher.CacheUpdated += ContentCacheRefresherUpdated; MediaCacheRefresher.CacheUpdated += MediaCacheRefresherUpdated; MemberCacheRefresher.CacheUpdated += MemberCacheRefresherUpdated; // fixme - content type? // events handling removed in ef013f9d3b945d0a48a306ff1afbd49c10c3fff8 // because, could not make sense of it? var contentIndexer = ExamineManager.Instance.IndexProviderCollection[Constants.Examine.InternalIndexer] as UmbracoContentIndexer; if (contentIndexer != null) { contentIndexer.DocumentWriting += IndexerDocumentWriting; } var memberIndexer = ExamineManager.Instance.IndexProviderCollection[Constants.Examine.InternalMemberIndexer] as UmbracoMemberIndexer; if (memberIndexer != null) { memberIndexer.DocumentWriting += IndexerDocumentWriting; } } private static void RebuildIndexes(bool onlyEmptyIndexes) { var indexers = (IEnumerable>)ExamineManager.Instance.IndexProviders; if (onlyEmptyIndexes) indexers = indexers.Where(x => x.Value.IsIndexNew()); foreach (var indexer in indexers) indexer.Value.RebuildIndex(); } private static void BindGridToExamine(GridPropertyEditor grid, IExamineIndexCollectionAccessor indexCollection) { var indexes = indexCollection.Indexes; if (indexes == null) return; foreach (var i in indexes.Values.OfType()) i.DocumentWriting += grid.DocumentWriting; } static void MemberCacheRefresherUpdated(MemberCacheRefresher sender, CacheRefresherEventArgs args) { switch (args.MessageType) { case MessageType.RefreshById: var c1 = Current.Services.MemberService.GetById((int)args.MessageObject); if (c1 != null) { ReIndexForMember(c1); } break; case MessageType.RemoveById: // This is triggered when the item is permanently deleted DeleteIndexForEntity((int)args.MessageObject, false); break; case MessageType.RefreshByInstance: var c3 = args.MessageObject as IMember; if (c3 != null) { ReIndexForMember(c3); } break; case MessageType.RemoveByInstance: // This is triggered when the item is permanently deleted var c4 = args.MessageObject as IMember; if (c4 != null) { DeleteIndexForEntity(c4.Id, false); } break; case MessageType.RefreshAll: case MessageType.RefreshByJson: default: //We don't support these, these message types will not fire for unpublished content break; } } static void MediaCacheRefresherUpdated(MediaCacheRefresher sender, CacheRefresherEventArgs args) { if (args.MessageType != MessageType.RefreshByPayload) throw new NotSupportedException(); var mediaService = Current.Services.MediaService; foreach (var payload in (MediaCacheRefresher.JsonPayload[]) args.MessageObject) { if (payload.ChangeTypes.HasType(TreeChangeTypes.Remove)) { // remove from *all* indexes DeleteIndexForEntity(payload.Id, false); } else if (payload.ChangeTypes.HasType(TreeChangeTypes.RefreshAll)) { // ExamineEvents does not support RefreshAll // just ignore that payload // so what?! } else // RefreshNode or RefreshBranch (maybe trashed) { var media = mediaService.GetById(payload.Id); if (media == null || media.Trashed) { // gone fishing, remove entirely DeleteIndexForEntity(payload.Id, false); continue; } // just that media ReIndexForMedia(media, media.Trashed == false); // branch if (payload.ChangeTypes.HasType(TreeChangeTypes.RefreshBranch)) { var descendants = mediaService.GetDescendants(media); foreach (var descendant in descendants) { ReIndexForMedia(descendant, descendant.Trashed == false); } } } } } static void ContentCacheRefresherUpdated(ContentCacheRefresher sender, CacheRefresherEventArgs args) { if (args.MessageType != MessageType.RefreshByPayload) throw new NotSupportedException(); var contentService = Current.Services.ContentService; foreach (var payload in (ContentCacheRefresher.JsonPayload[]) args.MessageObject) { if (payload.ChangeTypes.HasType(TreeChangeTypes.Remove)) { // delete content entirely (with descendants) // false: remove entirely from all indexes DeleteIndexForEntity(payload.Id, false); } else if (payload.ChangeTypes.HasType(TreeChangeTypes.RefreshAll)) { // ExamineEvents does not support RefreshAll // just ignore that payload // so what?! } else // RefreshNode or RefreshBranch (maybe trashed) { // don't try to be too clever - refresh entirely // there has to be race conds in there ;-( var content = contentService.GetById(payload.Id); if (content == null || content.Trashed) { // gone fishing, remove entirely from all indexes (with descendants) DeleteIndexForEntity(payload.Id, false); continue; } IContent published = null; if (content.HasPublishedVersion && ((ContentService)contentService).IsPathPublished(content)) { published = content.Published ? content : contentService.GetByVersion(content.PublishedVersionGuid); } // just that content ReIndexForContent(content, published); // branch if (payload.ChangeTypes.HasType(TreeChangeTypes.RefreshBranch)) { var masked = published == null ? null : new List(); var descendants = contentService.GetDescendants(content); foreach (var descendant in descendants) { published = null; if (masked != null) // else everything is masked { if (masked.Contains(descendant.ParentId) || descendant.HasPublishedVersion == false) { masked.Add(descendant.Id); } else { published = descendant.Published ? descendant : contentService.GetByVersion(descendant.PublishedVersionGuid); } } ReIndexForContent(descendant, published); } } } // NOTE // // DeleteIndexForEntity is handled by UmbracoContentIndexer.DeleteFromIndex() which takes // care of also deleting the descendants // // ReIndexForContent is NOT taking care of descendants so we have to reload everything // again in order to process the branch - we COULD improve that by just reloading the // XML from database instead of reloading content & re-serializing! } } private static void ReIndexForContent(IContent content, IContent published) { if (published != null && content.Version == published.Version) { ReIndexForContent(content); // same = both } else { if (published == null) { // remove 'published' - keep 'draft' DeleteIndexForEntity(content.Id, true); } else { // index 'published' - don't overwrite 'draft' ReIndexForContent(published, false); } ReIndexForContent(content, true); // index 'draft' } } private static void ReIndexForContent(IContent sender, bool? supportUnpublished = null) { var xml = sender.ToXml(); //add an icon attribute to get indexed xml.Add(new XAttribute("icon", sender.ContentType.Icon)); ExamineManager.Instance.ReIndexNode( xml, IndexTypes.Content, ExamineManager.Instance.IndexProviderCollection.OfType() // only for the specified indexers .Where(x => supportUnpublished.HasValue == false || supportUnpublished.Value == x.SupportUnpublishedContent) .Where(x => x.EnableDefaultEventHandler)); } private static void ReIndexForMember(IMember member) { ExamineManager.Instance.ReIndexNode( member.ToXml(), IndexTypes.Member, ExamineManager.Instance.IndexProviderCollection.OfType() //ensure that only the providers are flagged to listen execute .Where(x => x.EnableDefaultEventHandler)); } private static void ReIndexForMedia(IMedia sender, bool isMediaPublished) { var xml = sender.ToXml(); //add an icon attribute to get indexed xml.Add(new XAttribute("icon", sender.ContentType.Icon)); ExamineManager.Instance.ReIndexNode( xml, IndexTypes.Media, ExamineManager.Instance.IndexProviderCollection.OfType() // index this item for all indexers if the media is not trashed, otherwise if the item is trashed // then only index this for indexers supporting unpublished media .Where(x => isMediaPublished || (x.SupportUnpublishedContent)) .Where(x => x.EnableDefaultEventHandler)); } /// /// Remove items from any index that doesn't support unpublished content /// /// /// /// If true, indicates that we will only delete this item from indexes that don't support unpublished content. /// If false it will delete this from all indexes regardless. /// private static void DeleteIndexForEntity(int entityId, bool keepIfUnpublished) { ExamineManager.Instance.DeleteFromIndex( entityId.ToString(CultureInfo.InvariantCulture), ExamineManager.Instance.IndexProviderCollection.OfType() // if keepIfUnpublished == true then only delete this item from indexes not supporting unpublished content, // otherwise if keepIfUnpublished == false then remove from all indexes .Where(x => keepIfUnpublished == false || (x is UmbracoContentIndexer && ((UmbracoContentIndexer)x).SupportUnpublishedContent == false)) .Where(x => x.EnableDefaultEventHandler)); } /// /// Event handler to create a lower cased version of the node name, this is so we can support case-insensitive searching and still /// use the Whitespace Analyzer /// /// /// private static void IndexerDocumentWriting(object sender, DocumentWritingEventArgs e) { if (e.Fields.Keys.Contains("nodeName")) { //TODO: This logic should really be put into the content indexer instead of hidden here!! //add the lower cased version e.Document.Add(new Field("__nodeName", e.Fields["nodeName"].ToLower(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO )); } } } }