Copied to clipboard

Flag this post as spam?

This post will be reported to the moderators as potential spam to be looked at


  • Stefan Stankovic 18 posts 165 karma points
    Jan 15, 2024 @ 12:26
    Stefan Stankovic
    0

    Umbraco Examine Highlight result

    I'm looking for a solution to Highlight the examine result. I've already checked some solutions:

    https://our.umbraco.com/forum/developers/api-questions/44447-Highlight-text-in-examine-search-results-when-matching-beginning-of-words

    https://our.umbraco.com/forum/developers/extending-umbraco/19329-Search-multiple-fields-for-multiple-terms-with-examine?p=0

    https://our.umbraco.com/forum/using-umbraco-and-getting-started/90477-umbraco-examine-search-result-highlighting

    https://our.umbraco.com/forum/developers/extending-umbraco/13571-Umbraco-Examine-Search-Results-Highlighting

    But there are inconsistencies between the last Examine version and Lucene.Net.Contrib. The last Examine version uses the last Lucene version 4.8.0-beta00016, but Lucene.Net.Contrib uses the Lucene version 3.0.3 so if I try to call GetBestFragments I get the following exception: 'Could not load type 'Lucene.Net.Analysis.Tokenattributes.ITermAttribute' from assembly 'Lucene.Net, Version=4.0.0.0, Culture=neutral, PublicKeyToken=85089178b9ac3181'.'

    I've checked the Locene DLL and there is no required attribute: enter image description here

    My code:

    public static string GetHighlight(string IndexField, IndexReader reader, string searchQuery, string highlightField)
            {
                string hightlightText = string.Empty;
    
                var formatter = new SimpleHTMLFormatter("<span class=\"umbSearchHighlight\">", "</span>");
    
                var highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, reader));
                var tokenStream = new StandardAnalyzer(LuceneInfo.CurrentVersion).GetTokenStream(highlightField, new StringReader(IndexField));
                string tmp = highlighter.GetBestFragments(tokenStream, IndexField, 3, "...");
                if (tmp.Length > 0)
                    hightlightText = tmp + "...";
                return hightlightText;
            }
    

    Searach function:

        _examineManager.TryGetIndex(UmbracoConstants.UmbracoIndexes.ExternalIndexName, out IIndex externalIndex);
    if (externalIndex.Searcher is LuceneSearcher luceneSearcher)
                    {
                        var highlight = ExamineHelper.GetHighlight(
                        "nodeName",
                        luceneSearcher.GetSearchContext().GetSearcher().IndexSearcher.IndexReader,
                        $"nodeName:term",
                        "nodeName"
                        );
                    }
    
  • Jeroen Breuer 4909 posts 12266 karma points MVP 5x admin c-trib
    23 days ago
    Jeroen Breuer
    0

    Hi Stefan,

    Did you ever find a solution for Examine highlighting?

    Jeroen

  • Stefan Stankovic 18 posts 165 karma points
    22 days ago
    Stefan Stankovic
    0

    Hi Jeroen

    Unfortunately, I haven't found the solution so I fetched all the data and then did some Regex search to highlight the response.

  • Bjarne Fyrstenborg 1286 posts 4060 karma points MVP 8x c-trib
    21 days ago
    Bjarne Fyrstenborg
    100

    I have used this in some projects:

    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.Standard;
    using Lucene.Net.QueryParsers.Classic;
    using Lucene.Net.Search;
    using Lucene.Net.Search.Highlight;
    using System.Text.RegularExpressions;
    using Umbraco.Extensions;
    
    namespace My.Project;
    
    using Version = Lucene.Net.Util.LuceneVersion;
    
    public class LuceneHighlighter
    {
        private readonly Version _luceneVersion = Version.LUCENE_48;
    
        /// <summary>
        /// Initialises the queryparsers with an empty dictionary
        /// </summary>
        protected Dictionary<string, QueryParser> QueryParsers = new();
    
        /// <summary>
        /// Get or set the separator string (default = "...")
        /// </summary>
        public string Separator { get; set; }
    
        /// <summary>
        /// Get or set the maximum number of highlights to show (default = 5)
        /// </summary>
        public int MaxNumHighlights { get; set; }
    
        /// <summary>
        /// Get or set the Formatter to use (default = SimpleHTMLFormatter)
        /// </summary>
        public IFormatter HighlightFormatter { get; set; }
    
        /// <summary>
        /// Get or set the Analyzer to use (default = StandardAnalyzer)
        /// </summary>
        public Analyzer HighlightAnalyzer { get; set; }
    
        /// <summary>
        /// Get the index search being used
        /// </summary>
        public IndexSearcher Searcher { get; private set; }
    
        /// <summary>
        /// Get the Query to be used for highlighting
        /// </summary>
        public Lucene.Net.Search.Query LuceneQuery { get; private set; }
    
        /// <summary>
        /// Initialise a new LuceneHighlighter instance
        /// </summary>
        /// <param name="searcher">The IndexSearch being used</param>
        /// <param name="luceneQuery">The underlying Lucene Query being used</param>
        /// <param name="highlightCssClassName">The name of the CSS class used to wrap around highlighted words</param>
        public LuceneHighlighter(IndexSearcher searcher, Lucene.Net.Search.Query luceneQuery, string highlightCssClassName, string preTag, string postTag)
        {
            this.Searcher = searcher;
            this.LuceneQuery = luceneQuery;
            this.Separator = "...";
            this.MaxNumHighlights = 5;
            this.HighlightAnalyzer = new StandardAnalyzer(_luceneVersion);
            this.HighlightFormatter = new SimpleHTMLFormatter(preTag.Replace($"{preTag}", $"{preTag.TrimEnd(">")} class=\"{highlightCssClassName}\">"), postTag);
        }
    
        /// <summary>
        /// Get the highlighted string for a value and a field
        /// </summary>
        /// <param name="value">The field value</param>
        /// <param name="highlightField">The field name</param>
        /// <param name="fragmentSize">Size in number of characters of each fragment</param>
        /// <returns>A string containing the highlighted result</returns>
        public string GetHighlight(string value, string highlightField, int fragmentSize = 100)
        {
            value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
            // weird bug in GetBestFragments always adds "content"
    
            QueryScorer scorer = new(LuceneQuery.Rewrite(Searcher.IndexReader));
    
            Highlighter highlighter = new(HighlightFormatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(fragmentSize)
            };
    
            TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
            return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
        }
    
        /// <summary>
        /// Get the highlighted strings for a value and a field
        /// </summary>
        /// <param name="value">The field value</param>
        /// <param name="highlightField">The field name</param>
        /// <param name="fragmentSize">Size in number of characters of each fragment</param>
        /// <returns>A string array containing the highlighted results</returns>
        public string[] GetHighlights(string value, string highlightField, int fragmentSize = 100)
        {
            value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
            // weird bug in GetBestFragments always adds "content"
    
            QueryScorer scorer = new(LuceneQuery.Rewrite(Searcher.IndexReader));
    
            Highlighter highlighter = new(HighlightFormatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(fragmentSize)
            };
    
            TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
            return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights);
        }
    
        /// <summary>
        /// Get the highlighted field for a value and field
        /// </summary>
        /// <param name="value">The field value</param>
        /// <param name="searcher">The Examine searcher</param>
        /// <param name="highlightField">The hghlight field</param>
        /// <param name="luceneQuery">The query being used</param>
        /// <param name="fragmentSize">Size in number of characters of each fragment</param>
        /// <returns>A string containing the highlighted result</returns>
        public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Lucene.Net.Search.Query luceneQuery, int fragmentSize = 100)
        {
            QueryScorer scorer = new(luceneQuery.Rewrite(searcher.IndexReader));
            Highlighter highlighter = new(HighlightFormatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(fragmentSize)
            };
    
            TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
            return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
        }
    
        /// <summary>
        /// Gets a query parser for a hightlight field
        /// </summary>
        /// <param name="highlightField">The field</param>
        /// <returns>A query parser</returns>
        protected QueryParser GetQueryParser(string highlightField)
        {
            if (!QueryParsers.ContainsKey(highlightField))
            {
                QueryParsers[highlightField] = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
            }
            return QueryParsers[highlightField];
        }
    }
    

    and then used like this:

    public SearchResults? SearchPages()
    {
        if (!_examineManager.TryGetIndex(Umbraco.Cms.Core.Constants.UmbracoIndexes.ExternalIndexName, out IIndex index))
        {
            throw new InvalidOperationException($"No index found by name {Umbraco.Cms.Core.Constants.UmbracoIndexes.ExternalIndexName}");
        }
    
        var searcher = index.Searcher;
    
        var searchQuery = new SearchQuery(searcher);
        var searchFields = new string[]
        {
            SearchField.PageTitle,
            SearchField.MetaDescription,
            SearchField.Contents
        };
    
        var query = ... // Replace query
    
        var searchResults = query.Execute(new QueryOptions(criteria.Skip, criteria.Take));
    
        // https://our.umbraco.com/forum/using-umbraco-and-getting-started/90477-umbraco-examine-search-result-highlighting
    
        var indexSearcher = ((BaseLuceneSearcher)searcher).GetSearchContext().GetSearcher().IndexSearcher;
    
        var stdAnalyzer = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48);
    
        var umbracoContext = _umbracoContextAccessor.GetRequiredUmbracoContext();
    
        return GetSearchResults(searchResults, umbracoContext, criteria.SearchTerm, indexSearcher, stdAnalyzer);
    }
    
    private static SearchResults? GetSearchResults(ISearchResults? searchResults, IUmbracoContext? umbracoContext, string? term, IndexSearcher searcher, Analyzer analyzer)
    {
        if (searchResults == null) return null;
    
        if (string.IsNullOrEmpty(term)) return null;
    
        var results = new SearchResults(searchResults.TotalItemCount)
        {
            Items = searchResults
                .Select(x => MapResultItem(x, umbracoContext, term, searcher, analyzer))
                .WhereNotNull()
                ?.ToList(),
        };
    
        return results;
    }
    
    private static SearchResultItem? MapResultItem(ISearchResult result, IUmbracoContext? umbracoContext, string term, IndexSearcher searcher, Analyzer analyzer)
    {
        if (!int.TryParse(result.Id, out int nodeId))
            return null;
    
        var content = umbracoContext?.Content?.GetById(nodeId);
        if (content == null)
            return null;
    
        string heading = result.Values[SearchField.Heading] ?? string.Empty;
        string description = result.Values[SearchField.Description] ?? string.Empty;
    
        var hightlightHeading = new LuceneHighlighter(searcher,
            new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, SearchField.Heading, analyzer).Parse(term), "highlight", "<mark>", "</mark>")
            .GetHighlight(result.Values[SearchField.Heading], SearchField.Heading);
    
        if (!string.IsNullOrEmpty(hightlightHeading))
        {
            heading = hightlightHeading;
        }
    
        var hightlightDescription = new LuceneHighlighter(searcher,
            new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, SearchField.Description, analyzer).Parse(term), "highlight", "<mark>", "</mark>")
            .GetHighlight(description, SearchField.Description);
    
        if (!string.IsNullOrEmpty(hightlightDescription))
        {
            description = hightlightDescription;
        }
    
        return new SearchResultItem(content)
        {
            Heading = heading,
            Description = StringUtilities.Truncate(description, 400, true, false)?.ToString(),
            Url = content?.Url()
        };
    }
    

    It wraps the hightlight in an <mark> element, that can be styled. Of course if can also use <span> or set a specific CSS class.

Please Sign in or register to post replies

Write your reply to:

Draft