But there are inconsistencies between the last Examine version and Lucene.Net.Contrib. The last Examine version uses the last Lucene version 4.8.0-beta00016, but Lucene.Net.Contrib uses the Lucene version 3.0.3 so if I try to call GetBestFragments I get the following exception:
'Could not load type 'Lucene.Net.Analysis.Tokenattributes.ITermAttribute' from assembly 'Lucene.Net, Version=4.0.0.0, Culture=neutral, PublicKeyToken=85089178b9ac3181'.'
I've checked the Locene DLL and there is no required attribute:
My code:
public static string GetHighlight(string IndexField, IndexReader reader, string searchQuery, string highlightField)
{
string hightlightText = string.Empty;
var formatter = new SimpleHTMLFormatter("<span class=\"umbSearchHighlight\">", "</span>");
var highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, reader));
var tokenStream = new StandardAnalyzer(LuceneInfo.CurrentVersion).GetTokenStream(highlightField, new StringReader(IndexField));
string tmp = highlighter.GetBestFragments(tokenStream, IndexField, 3, "...");
if (tmp.Length > 0)
hightlightText = tmp + "...";
return hightlightText;
}
Searach function:
_examineManager.TryGetIndex(UmbracoConstants.UmbracoIndexes.ExternalIndexName, out IIndex externalIndex);
if (externalIndex.Searcher is LuceneSearcher luceneSearcher)
{
var highlight = ExamineHelper.GetHighlight(
"nodeName",
luceneSearcher.GetSearchContext().GetSearcher().IndexSearcher.IndexReader,
$"nodeName:term",
"nodeName"
);
}
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using System.Text.RegularExpressions;
using Umbraco.Extensions;
namespace My.Project;
using Version = Lucene.Net.Util.LuceneVersion;
public class LuceneHighlighter
{
private readonly Version _luceneVersion = Version.LUCENE_48;
/// <summary>
/// Initialises the queryparsers with an empty dictionary
/// </summary>
protected Dictionary<string, QueryParser> QueryParsers = new();
/// <summary>
/// Get or set the separator string (default = "...")
/// </summary>
public string Separator { get; set; }
/// <summary>
/// Get or set the maximum number of highlights to show (default = 5)
/// </summary>
public int MaxNumHighlights { get; set; }
/// <summary>
/// Get or set the Formatter to use (default = SimpleHTMLFormatter)
/// </summary>
public IFormatter HighlightFormatter { get; set; }
/// <summary>
/// Get or set the Analyzer to use (default = StandardAnalyzer)
/// </summary>
public Analyzer HighlightAnalyzer { get; set; }
/// <summary>
/// Get the index search being used
/// </summary>
public IndexSearcher Searcher { get; private set; }
/// <summary>
/// Get the Query to be used for highlighting
/// </summary>
public Lucene.Net.Search.Query LuceneQuery { get; private set; }
/// <summary>
/// Initialise a new LuceneHighlighter instance
/// </summary>
/// <param name="searcher">The IndexSearch being used</param>
/// <param name="luceneQuery">The underlying Lucene Query being used</param>
/// <param name="highlightCssClassName">The name of the CSS class used to wrap around highlighted words</param>
public LuceneHighlighter(IndexSearcher searcher, Lucene.Net.Search.Query luceneQuery, string highlightCssClassName, string preTag, string postTag)
{
this.Searcher = searcher;
this.LuceneQuery = luceneQuery;
this.Separator = "...";
this.MaxNumHighlights = 5;
this.HighlightAnalyzer = new StandardAnalyzer(_luceneVersion);
this.HighlightFormatter = new SimpleHTMLFormatter(preTag.Replace($"{preTag}", $"{preTag.TrimEnd(">")} class=\"{highlightCssClassName}\">"), postTag);
}
/// <summary>
/// Get the highlighted string for a value and a field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="highlightField">The field name</param>
/// <param name="fragmentSize">Size in number of characters of each fragment</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, string highlightField, int fragmentSize = 100)
{
value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
// weird bug in GetBestFragments always adds "content"
QueryScorer scorer = new(LuceneQuery.Rewrite(Searcher.IndexReader));
Highlighter highlighter = new(HighlightFormatter, scorer)
{
TextFragmenter = new SimpleFragmenter(fragmentSize)
};
TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
/// <summary>
/// Get the highlighted strings for a value and a field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="highlightField">The field name</param>
/// <param name="fragmentSize">Size in number of characters of each fragment</param>
/// <returns>A string array containing the highlighted results</returns>
public string[] GetHighlights(string value, string highlightField, int fragmentSize = 100)
{
value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
// weird bug in GetBestFragments always adds "content"
QueryScorer scorer = new(LuceneQuery.Rewrite(Searcher.IndexReader));
Highlighter highlighter = new(HighlightFormatter, scorer)
{
TextFragmenter = new SimpleFragmenter(fragmentSize)
};
TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights);
}
/// <summary>
/// Get the highlighted field for a value and field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="searcher">The Examine searcher</param>
/// <param name="highlightField">The hghlight field</param>
/// <param name="luceneQuery">The query being used</param>
/// <param name="fragmentSize">Size in number of characters of each fragment</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Lucene.Net.Search.Query luceneQuery, int fragmentSize = 100)
{
QueryScorer scorer = new(luceneQuery.Rewrite(searcher.IndexReader));
Highlighter highlighter = new(HighlightFormatter, scorer)
{
TextFragmenter = new SimpleFragmenter(fragmentSize)
};
TokenStream tokenStream = HighlightAnalyzer.GetTokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
/// <summary>
/// Gets a query parser for a hightlight field
/// </summary>
/// <param name="highlightField">The field</param>
/// <returns>A query parser</returns>
protected QueryParser GetQueryParser(string highlightField)
{
if (!QueryParsers.ContainsKey(highlightField))
{
QueryParsers[highlightField] = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
}
return QueryParsers[highlightField];
}
}
and then used like this:
public SearchResults? SearchPages()
{
if (!_examineManager.TryGetIndex(Umbraco.Cms.Core.Constants.UmbracoIndexes.ExternalIndexName, out IIndex index))
{
throw new InvalidOperationException($"No index found by name {Umbraco.Cms.Core.Constants.UmbracoIndexes.ExternalIndexName}");
}
var searcher = index.Searcher;
var searchQuery = new SearchQuery(searcher);
var searchFields = new string[]
{
SearchField.PageTitle,
SearchField.MetaDescription,
SearchField.Contents
};
var query = ... // Replace query
var searchResults = query.Execute(new QueryOptions(criteria.Skip, criteria.Take));
// https://our.umbraco.com/forum/using-umbraco-and-getting-started/90477-umbraco-examine-search-result-highlighting
var indexSearcher = ((BaseLuceneSearcher)searcher).GetSearchContext().GetSearcher().IndexSearcher;
var stdAnalyzer = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48);
var umbracoContext = _umbracoContextAccessor.GetRequiredUmbracoContext();
return GetSearchResults(searchResults, umbracoContext, criteria.SearchTerm, indexSearcher, stdAnalyzer);
}
private static SearchResults? GetSearchResults(ISearchResults? searchResults, IUmbracoContext? umbracoContext, string? term, IndexSearcher searcher, Analyzer analyzer)
{
if (searchResults == null) return null;
if (string.IsNullOrEmpty(term)) return null;
var results = new SearchResults(searchResults.TotalItemCount)
{
Items = searchResults
.Select(x => MapResultItem(x, umbracoContext, term, searcher, analyzer))
.WhereNotNull()
?.ToList(),
};
return results;
}
private static SearchResultItem? MapResultItem(ISearchResult result, IUmbracoContext? umbracoContext, string term, IndexSearcher searcher, Analyzer analyzer)
{
if (!int.TryParse(result.Id, out int nodeId))
return null;
var content = umbracoContext?.Content?.GetById(nodeId);
if (content == null)
return null;
string heading = result.Values[SearchField.Heading] ?? string.Empty;
string description = result.Values[SearchField.Description] ?? string.Empty;
var hightlightHeading = new LuceneHighlighter(searcher,
new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, SearchField.Heading, analyzer).Parse(term), "highlight", "<mark>", "</mark>")
.GetHighlight(result.Values[SearchField.Heading], SearchField.Heading);
if (!string.IsNullOrEmpty(hightlightHeading))
{
heading = hightlightHeading;
}
var hightlightDescription = new LuceneHighlighter(searcher,
new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, SearchField.Description, analyzer).Parse(term), "highlight", "<mark>", "</mark>")
.GetHighlight(description, SearchField.Description);
if (!string.IsNullOrEmpty(hightlightDescription))
{
description = hightlightDescription;
}
return new SearchResultItem(content)
{
Heading = heading,
Description = StringUtilities.Truncate(description, 400, true, false)?.ToString(),
Url = content?.Url()
};
}
It wraps the hightlight in an <mark> element, that can be styled. Of course if can also use <span> or set a specific CSS class.
Umbraco Examine Highlight result
I'm looking for a solution to Highlight the examine result. I've already checked some solutions:
https://our.umbraco.com/forum/developers/api-questions/44447-Highlight-text-in-examine-search-results-when-matching-beginning-of-words
https://our.umbraco.com/forum/developers/extending-umbraco/19329-Search-multiple-fields-for-multiple-terms-with-examine?p=0
https://our.umbraco.com/forum/using-umbraco-and-getting-started/90477-umbraco-examine-search-result-highlighting
https://our.umbraco.com/forum/developers/extending-umbraco/13571-Umbraco-Examine-Search-Results-Highlighting
But there are inconsistencies between the last Examine version and Lucene.Net.Contrib. The last Examine version uses the last Lucene version 4.8.0-beta00016, but Lucene.Net.Contrib uses the Lucene version 3.0.3 so if I try to call GetBestFragments I get the following exception: 'Could not load type 'Lucene.Net.Analysis.Tokenattributes.ITermAttribute' from assembly 'Lucene.Net, Version=4.0.0.0, Culture=neutral, PublicKeyToken=85089178b9ac3181'.'
I've checked the Locene DLL and there is no required attribute:
My code:
Searach function:
Hi Stefan,
Did you ever find a solution for Examine highlighting?
Jeroen
Hi Jeroen
Unfortunately, I haven't found the solution so I fetched all the data and then did some Regex search to highlight the response.
I have used this in some projects:
and then used like this:
It wraps the hightlight in an
<mark>
element, that can be styled. Of course if can also use<span>
or set a specific CSS class.is working on a reply...