var stdAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
var formatter = new SimpleHTMLFormatter();
var finalQuery = new BooleanQuery();
var tmpQuery = new BooleanQuery();
var multiQueryParser = new MultiFieldQueryParser(Version.LUCENE_29, fields, stdAnalyzer);
var externalIndexSet = Examine.LuceneEngine.Config.IndexSets.Instance.Sets["ExternalIndexSet"];
var externalSearcher = new IndexSearcher($"{externalIndexSet.IndexDirectory.FullName}\\Index", true);
var terms = searchTerm.RemoveStopWords().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var term in terms)
{
tmpQuery.Add(multiQueryParser.Parse(term.Replace("~", "") + $@"~{fuzzyScore}"),
BooleanClause.Occur.SHOULD);
}
tmpQuery.Add(multiQueryParser.Parse("noIndex:1"), BooleanClause.Occur.MUST_NOT);
finalQuery.Add(multiQueryParser.Parse($@"{tmpQuery}"),
BooleanClause.Occur.MUST);
finalQuery.Add(multiQueryParser.Parse("__IndexType:content"), BooleanClause.Occur.MUST);
var hits = externalSearcher.Search(finalQuery, 100);
var qs = new QueryScorer(finalQuery);
var highlighter = new Highlighter(formatter, qs);
var fragmenter = new SimpleFragmenter();
highlighter.SetTextFragmenter(fragmenter);
highlighter.SetMaxDocBytesToAnalyze(int.MaxValue);
foreach (var item in hits.ScoreDocs)
{
var document = externalSearcher.Doc(item.doc);
var description = document.Get("description");
var tokenStream = TokenSources.GetTokenStream(externalSearcher.GetIndexReader(), item.doc,
"description", stdAnalyzer);
var frags = highlighter.GetBestFragments(tokenStream, description, 10);
}
externalSearcher.Dispose();
Everything seems working fine except I can't get token stream regardless how many different methods from different classes I've tried, therefore no frags returned. I then looked at the lucene.net source code here at https://lucenenet.apache.org/docs/3.0.3/df/d43/tokensources8cssource.html and found that the method GetTokenStream will throw an ArgumentException (see image below) if the "description" field I use above is not TermPositionVector. I got exactly this exception when I debugged it. How do I fix this issue?
I use default ExternalSearcher & ExternalIndexSet provided by Umbraco (7.7.6) to index & query content within BackOffice.
I used Lucene Luke to examine the index Umbraco created and found that the description field has option Term Vector ticked but not positions nor offsets (see image below), that means Umbraco Examine only knows the number of occurrences, not positions and offsets which are required to be able to get token stream I mentioned in the initial post. Reference: http://makble.com/what-is-term-vector-in-lucene
Can anyone shed some lights on how to fix this? Thanks.
First, add a reference to the NuGet package Lucene.Net.Contrib 2.9.4.1 (ensure it's the 2.9.4.1 version and not latest).
Then I have the following class with various methods to generate highlighting:
public class LuceneHighlighter
{
private readonly Lucene.Net.Util.Version _luceneVersion = Lucene.Net.Util.Version.LUCENE_29;
/// <summary>
/// Initialises the queryparsers with an empty dictionary
/// </summary>
protected Dictionary<string, QueryParser> QueryParsers = new Dictionary<string, QueryParser>();
/// <summary>
/// Get or set the separator string (default = "...")
/// </summary>
public string Separator { get; set; }
/// <summary>
/// Get or set the maximum number of highlights to show (default = 5)
/// </summary>
public int MaxNumHighlights { get; set; }
/// <summary>
/// Get or set the Formatter to use (default = SimpleHTMLFormatter)
/// </summary>
public Formatter HighlightFormatter { get; set; }
/// <summary>
/// Get or set the Analyzer to use (default = StandardAnalyzer)
/// </summary>
public Analyzer HighlightAnalyzer { get; set; }
/// <summary>
/// Get the index search being used
/// </summary>
public IndexSearcher Searcher { get; private set; }
/// <summary>
/// Get the Query to be used for highlighting
/// </summary>
public Query LuceneQuery { get; private set; }
/// <summary>
/// Initialise a new LuceneHighlighter instance
/// </summary>
/// <param name="searcher">The IndexSearch being used</param>
/// <param name="luceneQuery">The underlying Lucene Query being used</param>
/// <param name="highlightCssClassName">The name of the CSS class used to wrap around highlighted words</param>
public LuceneHighlighter(IndexSearcher searcher, Query luceneQuery, string highlightCssClassName)
{
this.Searcher = searcher;
this.LuceneQuery = luceneQuery;
this.Separator = "...";
this.MaxNumHighlights = 5;
this.HighlightAnalyzer = new StandardAnalyzer(_luceneVersion);
this.HighlightFormatter = new SimpleHTMLFormatter("<span class=\"" + highlightCssClassName + "\">", "</span> ");
}
/// <summary>
/// Get the highlighted string for a value and a field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="highlightField">The field name</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, string highlightField)
{
value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase); // weird bug in GetBestFragments always adds "content"
var scorer = new QueryScorer(LuceneQuery.Rewrite(Searcher.GetIndexReader()));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
/// <summary>
/// Get the highlighted field for a value and field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="searcher">The Examine searcher</param>
/// <param name="highlightField">The hghlight field</param>
/// <param name="luceneQuery">The query being used</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery)
{
var scorer = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader()));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
/// <summary>
/// Gets a query parser for a hightlight field
/// </summary>
/// <param name="highlightField">The field</param>
/// <returns>A query parser</returns>
protected QueryParser GetQueryParser(string highlightField)
{
if (!QueryParsers.ContainsKey(highlightField))
{
QueryParsers[highlightField] = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
}
return QueryParsers[highlightField];
}
}
By the way I found that my code looked pretty much same as yours except the parameter of the QueryScorer. My one did not have the .Rewrite method which was identified as the root of the issue. Again, thank you.
Hi Thanh, would you mind sharing the steps you took in order to get the highlighting working. Ive would really appriciate it. I have the requierd packages but not been able to get it working.
I've uploaded complete code here and hope it can help anyone who wants to implement the search highlight.
1. Create LuceneHighlighter class
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Highlight;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
public class LuceneHighlighter
{
private readonly Lucene.Net.Util.Version _luceneVersion = Lucene.Net.Util.Version.LUCENE_29;
/// <summary>
/// Initialises the queryparsers with an empty dictionary
/// </summary>
protected Dictionary<string, QueryParser> QueryParsers = new Dictionary<string, QueryParser>();
/// <summary>
/// Get or set the separator string (default = "...")
/// </summary>
public string Separator { get; set; }
/// <summary>
/// Get or set the maximum number of highlights to show (default = 5)
/// </summary>
public int MaxNumHighlights { get; set; }
/// <summary>
/// Get or set the Formatter to use (default = SimpleHTMLFormatter)
/// </summary>
public Formatter HighlightFormatter { get; set; }
/// <summary>
/// Get or set the Analyzer to use (default = StandardAnalyzer)
/// </summary>
public Analyzer HighlightAnalyzer { get; set; }
/// <summary>
/// Get the index search being used
/// </summary>
public IndexSearcher Searcher { get; private set; }
/// <summary>
/// Get the Query to be used for highlighting
/// </summary>
public Query LuceneQuery { get; private set; }
/// <summary>
/// Initialise a new LuceneHighlighter instance
/// </summary>
/// <param name="searcher">The IndexSearch being used</param>
/// <param name="luceneQuery">The underlying Lucene Query being used</param>
/// <param name="highlightCssClassName">The name of the CSS class used to wrap around highlighted words</param>
public LuceneHighlighter(IndexSearcher searcher, Query luceneQuery, string highlightCssClassName)
{
this.Searcher = searcher;
this.LuceneQuery = luceneQuery;
this.Separator = "...";
this.MaxNumHighlights = 5;
this.HighlightAnalyzer = new StandardAnalyzer(_luceneVersion);
this.HighlightFormatter = new SimpleHTMLFormatter();
}
/// <summary>
/// Get the highlighted string for a value and a field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="highlightField">The field name</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, string highlightField)
{
value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
// weird bug in GetBestFragments always adds "content"
var scorer = new QueryScorer(LuceneQuery.Rewrite(Searcher.GetIndexReader()));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
public string[] GetHighlights(string value, string highlightField)
{
value = Regex.Replace(value, "content", "", RegexOptions.IgnoreCase);
// weird bug in GetBestFragments always adds "content"
var scorer = new QueryScorer(LuceneQuery.Rewrite(Searcher.GetIndexReader()));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights);
}
/// <summary>
/// Get the highlighted field for a value and field
/// </summary>
/// <param name="value">The field value</param>
/// <param name="searcher">The Examine searcher</param>
/// <param name="highlightField">The hghlight field</param>
/// <param name="luceneQuery">The query being used</param>
/// <returns>A string containing the highlighted result</returns>
public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery)
{
var scorer = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader()));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
/// <summary>
/// Gets a query parser for a hightlight field
/// </summary>
/// <param name="highlightField">The field</param>
/// <returns>A query parser</returns>
protected QueryParser GetQueryParser(string highlightField)
{
if (!QueryParsers.ContainsKey(highlightField))
{
QueryParsers[highlightField] = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
}
return QueryParsers[highlightField];
}
}
2. Create IExamineContentSearchService interface
using System.Collections.Generic;
using subaru.com.au.web.ViewModels.Search.Content;
using Umbraco.Web;
public interface IExamineContentSearchService
{
HighlightContentSearchResultsViewModel Search(string searchProvider, string searchTerm,
UmbracoHelper umbracoHelper, string highlightField, int maxDocs, int offSet);
IList<string> AutoComplete(string searchProvider, string prefix, string field, int maxSuggestions);
}
3. Create ExamineContentSearchService class that implements the IExamineContentSearchService interface
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Examine.LuceneEngine.Config;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using subaru.com.au.web.ViewModels.Search.Content;
using Umbraco.Core.Logging;
using Umbraco.Web;
using Version = Lucene.Net.Util.Version;
class ExamineContentSearchService : IExamineContentSearchService
{
private readonly Logger _logger;
private const int MaxDocsToBeSearched = 1000;
public ExamineContentSearchService()
{
_logger = Logger.CreateWithDefaultLog4NetConfiguration();
}
public HighlightContentSearchResultsViewModel Search(string searchProvider, string searchTerm,
UmbracoHelper umbracoHelper, string highlightField, int maxDocs, int offSet)
{
var highLightContentList = new List<HighlightContentViewModel>();
var stdAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
var searchIndexSet = IndexSets.Instance.Sets[$"{searchProvider}IndexSet"];
var dirInfo = new DirectoryInfo($"{searchIndexSet.IndexDirectory.FullName}\\Index");
var directory = FSDirectory.Open(dirInfo);
var searcher = new IndexSearcher(directory, true);
var queryParser = new QueryParser(Version.LUCENE_29, highlightField, stdAnalyzer);
var query = queryParser.Parse(AddFuzziness(searchTerm));
var hits = searcher.Search(query, MaxDocsToBeSearched);
for (var i = offSet; i < offSet + maxDocs && i < hits.TotalHits; i++)
{
var document = searcher.Doc(hits.ScoreDocs[i].doc);
var value = document.Get(highlightField);
var node = umbracoHelper.TypedContent(document.Get("id"));
if (!string.IsNullOrEmpty(value))
{
var luceneHighlighter = new LuceneHighlighter(searcher, query, "highlight");
var highlightText = luceneHighlighter.GetHighlight(value, highlightField);
highLightContentList.Add(new HighlightContentViewModel
{
Content = node,
HighlightText = highlightText
});
}
else
{
highLightContentList.Add(new HighlightContentViewModel
{
Content = node,
HighlightText = string.Empty
});
}
}
var results = new HighlightContentSearchResultsViewModel
{
Count = hits.TotalHits,
SearchTerm = searchTerm,
Results = highLightContentList
};
return results;
}
private string AddFuzziness(string term, float fuzzyScore = 0.5f)
{
if (term.Contains(" "))
{
return term;
}
return $"{term}~{fuzzyScore}";
}
public IList<string> AutoComplete(string searchProvider, string prefix, string field, int maxSuggestions)
{
IList<string> suggestions = new List<string>();
try
{
var searchIndexSet = IndexSets.Instance.Sets[$"{searchProvider}IndexSet"];
var dirInfo = new DirectoryInfo($"{searchIndexSet.IndexDirectory.FullName}\\Index");
var directory = FSDirectory.Open(dirInfo);
var searcher = new IndexSearcher(directory, true);
var stdAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
var queryParser = new QueryParser(Version.LUCENE_29, field, stdAnalyzer);
var query = queryParser.Parse($"{prefix}*");
var topDocs = searcher.Search(query, maxSuggestions);
foreach (var doc in topDocs.ScoreDocs)
{
var document = searcher.Doc(doc.doc);
suggestions.Add(document.Get("suggestion").Replace("_", "").Replace(" ", " ").Trim());
}
}
catch (Exception exception)
{
_logger.Error(GetType(), "An error occurred: ", exception);
}
return suggestions.Distinct().OrderBy(x => x.Length).ToList();
}
}
4. Create IContentSearchService interface
using System.Threading.Tasks;
using subaru.com.au.web.ViewModels.Search;
using Umbraco.Web;
public interface IContentSearchService
{
Task BuildSearchAsync(CompositeSearchResultsViewModel toModel, string searchTerm, UmbracoHelper umbracoHelper,
int offSet);
}
5. Create ContentSearchService that implements the IContentSearchService
using System.Threading.Tasks;
using subaru.com.au.web.Services.Search.Content.Mappers;
using subaru.com.au.web.ViewModels.Search;
using subaru.com.au.web.ViewModels.Search.Content;
using Umbraco.Web;
class ContentSearchService : IContentSearchService
{
private readonly IExamineContentSearchService _examineContentSearchService;
private readonly IContentSearchViewModelMapper _mapper;
private readonly IContentSearchPaginationService _paginationService;
private const int MaxDocs = 10;
public ContentSearchService(IExamineContentSearchService examineContentSearchService,
IContentSearchViewModelMapper mapper, IContentSearchPaginationService paginationService)
{
_examineContentSearchService = examineContentSearchService;
_mapper = mapper;
_paginationService = paginationService;
}
public Task BuildSearchAsync(CompositeSearchResultsViewModel toModel, string searchTerm, UmbracoHelper umbracoHelper, int offSet)
{
var searchResults = _examineContentSearchService.Search("External", searchTerm, umbracoHelper,
"searchableField", MaxDocs, offSet);
toModel.ContentSearchResults = new ContentSearchResultsViewModel
{
Results = _mapper.Map(searchResults.Results, umbracoHelper),
SearchTerm = searchTerm,
Count = searchResults.Count,
Pages = _paginationService.CreatePagination(searchResults.Count, offSet, MaxDocs, searchTerm)
};
return Task.FromResult<object>(null);
}
}
6. Create IContentSearchViewModelMapper interface
using System.Collections.Generic;
using Examine;
using subaru.com.au.web.ViewModels.Search.Content;
using Umbraco.Web;
public interface IContentSearchViewModelMapper
{
IList<ContentSearchResultItemViewModel> Map(IList<HighlightContentViewModel> searchResult, UmbracoHelper umbracoHelper);
}
7. Create ContentSearchViewModelMapper that implements IContentSearchViewModelMapper
using System.Collections.Generic;
using System.Linq;
using Examine;
using subaru.com.au.web.ViewModels.Search.Content;
using Umbraco.Web;
class ContentSearchViewModelMapper : IContentSearchViewModelMapper
{
public IList<ContentSearchResultItemViewModel> Map(IList<HighlightContentViewModel> searchResult, UmbracoHelper umbracoHelper)
{
var rootNode = umbracoHelper.AssignedContentItem.Ancestor(1);
var contentList = new List<ContentSearchResultItemViewModel>();
foreach (var item in searchResult)
{
if (item.Content.Ancestor(1)?.Name == rootNode.Name)
{
var path =
umbracoHelper.TypedContent(item.Content.Path.Split(','))
.Where(x => x != null)
.Select(x => new { x.Name })
.ToList();
contentList.Add(new ContentSearchResultItemViewModel()
{
Id = item.Content.Id,
Url = item.Content.Url,
Title = item.Content.Name,
ContentExcerpt = item.HighlightText,
Path = string.Join(" > ", path.Select(x => x.Name))
});
}
}
return contentList;
}
}
8. ViewModel classes
using System.Collections.Generic;
public class ContentSearchResultsViewModel
{
public string SearchTerm { get; set; }
public IList<ContentSearchResultItemViewModel> Results { get; set; }
public long Count { get; set; }
public IList<Page> Pages { get; set; }
}
public class ContentSearchResultItemViewModel
{
public string ContentExcerpt { get; set; }
public string DocType { get; set; }
public int Id { get; set; }
public string Path { get; set; }
public string Title { get; set; }
public string Url { get; set; }
}
public class Page
{
public int PageNumber { get; set; }
public string DisplayText { get; set; }
public string PageUrl { get; set; }
public bool IsSelected { get; set; }
}
public class HighlightContentSearchResultsViewModel
{
public string SearchTerm { get; set; }
public long Count { get; set; }
public IList<HighlightContentViewModel> Results { get; set; }
}
public class HighlightContentViewModel
{
public IPublishedContent Content { get; set; }
public string HighlightText { get; set; }
}
Hi Hendy, I have been looking at the github page trying to understand the concept of the package but I'm struggling a bit. Would you be so kind and explain what I have to do getting highlighting working.
What steps do I need to do except installing the dll file. When trying to create a new LookQuery in razor I'm missing a reference.
Once you've installed the NuGet package, the namespaces required are:
using Our.Umbraco.Look.Services;
using Our.Umbraco.Look.Models;
To index some text (inside a startup event)
LookService.SetTextIndexer(x => {
IPublishedContent content = x.Item; // content, media or member
// do whatever to get the text to index for this content... for example:
// this could be hitting the website and scraping markup
// or combining some text properties
return "some text to be indexed";
});
To build a query:
var lookQuery = new LookQuery();
lookQuery.TextQuery.SearchText = "my search text";
lookQuery.TextQuery.GetHighlight = true;
To execute a query:
var lookResult = LookService.Query(lookQuery);
var highlights = lookResult.Select(x => x.Highlight);
HTH,
Hendy
(just to add, if you want to extend an existing query, this can be passed through via the RawQuery property on the LookQuery obj)
I'm trying to create a new LookQuery in the razor code but I'm missing a reference for it.
In order for me to understand the process:
I have created a class hooking in to the startup event:
using Our.Umbraco.Look.Services;
using Our.Umbraco.Look.Models;
using Umbraco.Core;
using Umbraco.Core.Models;
namespace PyramidWebsite.Helpers
{
public class ConfigureIndexing : ApplicationEventHandler
{
/// <summary>
/// Umbraco has started event
/// </summary>
protected override void ApplicationStarted(UmbracoApplicationBase umbracoApplication, ApplicationContext applicationContext)
{
LookService.SetTextIndexer(x => {
IPublishedContent content = x.Item; // content, media or member
// do whatever to get the text to index for this content... for example:
// this could be hitting the website and scraping markup
// or combining some text properties
return "some text to be indexed";
});
}
}
}
Then in the razor view I'm trying to create the new LookQuery. Is this the right approach? I guess I need to have a reference to created class that hooks into the startup event?
Your indexing event looks fine (you can use LUKE.net to see what's in a Lucene index).
To specify a reference to the LookQuery model your razor script you could add a using reference to or add it into the web.config inside the views folder. (If you're razor is in a different project to your event, then make sure the Look NuGet package is installed in both projects)
I'm getting a mising reference for Our? Isn't taht really strange? I can reference to "Our.Umbraco.Look.Services;" from the class but not from the razor view.
A couple of more questions if you don't mind? If you do, simply ignore them... :D
Will Look update the indexed content as Umbraco normally does? On publish and so forth?
I have created a "SetTextIndexer" with one of my own fields. It picks up the content as expected. These means the settings in ExamineIndex.config will be abundant right? All data that I would want to get indexed with Look have to be added with a "LookService"?
Right now I'm receiving text in a raw format, including html tags. How do I get rid of these but still keep the tag highlighting the matched string?
Also, Before I tried to get highlighting working a had a raw lucene string, would this work with Look?
I cant get my original lucene search string to work.
I have this LookService:
LookService.SetTextIndexer(indexingContext => {
// IPublishedContent of the content, media or member being indexed
var item = indexingContext.Item;
// string name of the Examine indexer
var indexerName = indexingContext.IndexerName;
if (indexerName == "KundUnikumIndexer")
{
//return item.Parent.Name + " " + item.Name;
string pageTitle = item.GetPropertyValue("pageTitleSearch").ToString();
string pageTextA = item.GetPropertyValue("aTextSearch").ToString();
string getText = pageTitle + " " + pageTextA;
return getText;
}
return null; // don't index
});
Then in my razor view:
var searchTerm = Request.QueryString["query"].Trim();
var lookQuery = new LookQuery("KundUnikumSearcher");
var luceneString = "pageTitleSearch:" + searchTerm + "*";
luceneString += " aTextSearch:" + searchTerm + "*";
luceneString += " FileTextContent:" + searchTerm + "*";
luceneString += " updateDocsPyramidVersion:" + searchTerm + "*";
lookQuery.TextQuery.SearchText = searchTerm;
lookQuery.TextQuery.GetHighlight = true;
var lookResult = LookService.Query(lookQuery);
var highlights = lookResult.Select(x => x.Highlight);
foreach (var item in highlights)
{
var text = Regex.Replace(item.ToString(), @"<[^strong>]*>", string.Empty);
var htmlString = new HtmlString(text);
<p>@htmlString</p>
}
But from what I understand I'ts not possible to have the highlight on "RawQuery"?
I would like to be able to search for multiple fields with wildcard endings. Preferably showing each node with the highlighted string. If that makes any sense at all...
I'm not really sure how to get the "GetHighlight" on a "RawQuery"?
In the code above you are not including your LuceneString as a raw query:
lookQuery.RawQuery = luceneString;
Also, the lookResult.Select(x => x.Highlight) will return just the highlights (you'll probably need the IPublishedContent back to do something with it ?) You can see all data returned if you do lookResult.ToArray();
However the highlight text returned will always be extracted from the Look indexed TextField rather than from many separate fields.
btw, why not index all the separate fields you want to use as a source for text highlighting into the Text index then you won't need the raw query at all ? (you can use wild cards with the TextQuery.SearchText)
Yeah, the markup is confusing, I left the ”luceneString” in order to show what the string looked like.
I did try to add the luceneString with a raw query ”lookQuery.RawQuery = luceneString;” but I was unable to get any rendered result with highlight. lookQuery.RawQuery.GetHighlight = true;
I will take a look at it again and se if I can sort it out. Once again, thank you so much for your time!
My lucene raw qurey with wildcard ending. This will give correct results when searching, f.ex. searched "för" will get hits on "första", so the wildcard is working.
var luceneSearchTerm = "Look_Text:" + searchTerm + "*";
Now, declaring the RawQuery.
lookQuery.RawQuery = luceneSearchTerm;
Then the getting the results with LookService:
var lookResult = LookService.Query(lookQuery);
Now I can do:
var totalResults = lookResult.Total;
var results = lookResult.ToArray();
// Gives me the total results
@totalResults
// Getting each item in the lookResult array
foreach (var item in results)
{
<p>@item.Name</p> // Gives me the Name
<p>@item.Item</p> // Gives me the IPublishedContent id
<p>@item.Highlight</p> // No highlight available?
<p>@item.Score</p> // Receives score value of "NAN"?
<p>@item.Text</p> // No text is available?
}
With @item.Item IPublishedContent I'm able to get my properties but what about highlighting and score?
I'm really confused about the RawQurey vs the TextQurey. According to the docs the RawQuery have no "SearchText", "GetHighlight" or "GetText" so how is it possible to retrieve highlight using RawQurey? Also receiving score value "NAN" seams odd?
The Raw Query when set becomes part of the query specifying the results to return. However the highlight text is always taken from the custom Look Text field that you populated with your indexer.
For a highlight to be returned you'll need to specify a Text Query eg.
var lookQuery = new LookQuery(){
TextQuery = new TextQuery() {
SearchText = "my text to find highlights with",
GetHightlight = true
}
};
I've never tried putting the Look Text field into a raw query, but that could be something that's changed in Look to make that work...
I was just about to raise an issue on GitHub where a RawQuery + TextQuery.GetHighlight = true, should return highlights but then realised why the TextQuery.SearchText value must be set, as it is this text value that's required for the highlighting sub queries.
So to distinguish between RawQuery and TextQuery - A RawQuery is added into the search criteria, but Look doesn't really know anything about it. A TextQuery tells Look to search for text, and also uses the same text for the highlighting sub-queries.
I wrote an extension for the Examine.SearchResult class that can be used to get the highlight easily:
/// <summary>
/// Gets the contents of a field as a summary fragment containing the keywords highlighted
/// </summary>
/// <param name="result">The search result</param>
/// <param name="fieldName">The field name to use (eg. 'bodyText')</param>
/// <param name="highlighter">A reference to an instance of a Lucene highlighter</param>
/// <returns>A string containing the field contents with search words highlighted</returns>
public static string GetHighlightForField(this SearchResult result, string fieldName, LuceneHighlighter highlighter)
{
string highglight = null;
if (result.Fields.ContainsKey(fieldName))
{
string fieldContents = result.Fields[fieldName];
if (fieldContents != null)
{
highglight = highlighter.GetHighlight(fieldContents, fieldName);
}
}
return highglight;
}
Umbraco Examine - Search result highlighting
Hi guys,
I'm trying implement the search result highlighting (like Google) within an Umbraco web app. I followed this https://our.umbraco.org/forum/developers/extending-umbraco/13571-Umbraco-Examine-Search-Results-Highlighting, however it's 8 years old and I want to target multiple fields with fuzzy search so below is my code:
Everything seems working fine except I can't get token stream regardless how many different methods from different classes I've tried, therefore no frags returned. I then looked at the lucene.net source code here at https://lucenenet.apache.org/docs/3.0.3/df/d43/tokensources8cssource.html and found that the method GetTokenStream will throw an ArgumentException (see image below) if the "description" field I use above is not TermPositionVector. I got exactly this exception when I debugged it. How do I fix this issue?
I use default ExternalSearcher & ExternalIndexSet provided by Umbraco (7.7.6) to index & query content within BackOffice.
Thanks.
TP
Update.
I used Lucene Luke to examine the index Umbraco created and found that the description field has option Term Vector ticked but not positions nor offsets (see image below), that means Umbraco Examine only knows the number of occurrences, not positions and offsets which are required to be able to get token stream I mentioned in the initial post. Reference: http://makble.com/what-is-term-vector-in-lucene
Can anyone shed some lights on how to fix this? Thanks.
Can anyone help please as our client really wants to have this feature when they decommission Google search plugin?
Here's how I do syntax highlighting in Lucene:
First, add a reference to the NuGet package Lucene.Net.Contrib 2.9.4.1 (ensure it's the 2.9.4.1 version and not latest).
Then I have the following class with various methods to generate highlighting:
Thanks heaps Dan, I'll try it and let you know how it goes.
Hi Dan,
Woohoo, it's working. Thank you very much :).
By the way I found that my code looked pretty much same as yours except the parameter of the QueryScorer. My one did not have the .Rewrite method which was identified as the root of the issue. Again, thank you.
Hi Dan/Thanh,
Could you show me how you called the LuceneHighlighter class using razor on your search/search results page?
Thanks Jonny
Hi Thanh, would you mind sharing the steps you took in order to get the highlighting working. Ive would really appriciate it. I have the requierd packages but not been able to get it working.
Cheers /David
I've uploaded complete code here and hope it can help anyone who wants to implement the search highlight.
1. Create LuceneHighlighter class
2. Create IExamineContentSearchService interface
3. Create ExamineContentSearchService class that implements the IExamineContentSearchService interface
4. Create IContentSearchService interface
5. Create ContentSearchService that implements the IContentSearchService
6. Create IContentSearchViewModelMapper interface
7. Create ContentSearchViewModelMapper that implements IContentSearchViewModelMapper
8. ViewModel classes
9. Razor view
I left out the way I did DI for the sake of simplicity. Hope this helps.
Awsome Thanh, I will try it out and let you know.
/David
Hi David, (this is a bit of a shameless plug) but the Look package might be useful as it will do text highlighting on data stored in Exmaine.
Yeah, I have thought of trying it out and I think you just pushed me in that direction even further. :D
Thanks.
Hi Hendy, I have been looking at the github page trying to understand the concept of the package but I'm struggling a bit. Would you be so kind and explain what I have to do getting highlighting working.
What steps do I need to do except installing the dll file. When trying to create a new LookQuery in razor I'm missing a reference.
Best regards /David
Hi David, what is the missing reference error ?
Once you've installed the NuGet package, the namespaces required are:
To index some text (inside a startup event)
To build a query:
To execute a query:
HTH, Hendy
(just to add, if you want to extend an existing query, this can be passed through via the RawQuery property on the LookQuery obj)
Thanks, please bare with me Hendy...
I'm trying to create a new LookQuery in the razor code but I'm missing a reference for it.
In order for me to understand the process:
I have created a class hooking in to the startup event:
Then in the razor view I'm trying to create the new LookQuery. Is this the right approach? I guess I need to have a reference to created class that hooks into the startup event?
Your indexing event looks fine (you can use LUKE.net to see what's in a Lucene index).
To specify a reference to the LookQuery model your razor script you could add a using reference to or add it into the web.config inside the views folder. (If you're razor is in a different project to your event, then make sure the Look NuGet package is installed in both projects)
Okey, but I can't find the reference name I should use in the razor view? :/
does the following work ?
I'm getting a mising reference for Our? Isn't taht really strange? I can reference to "Our.Umbraco.Look.Services;" from the class but not from the razor view.
I get missing reference for "Our"? Isn't that strange? I can reference to "Our.Umbraco.Look.Services;" from the class but not from the razor view.
Is the razor view in a different vs.net project to the class file ?
nope. :)
UPDATE! After restarting VS I'm able to reference the class.
Great Hendy, I've got it working now.
A couple of more questions if you don't mind? If you do, simply ignore them... :D
Will Look update the indexed content as Umbraco normally does? On publish and so forth?
I have created a "SetTextIndexer" with one of my own fields. It picks up the content as expected. These means the settings in ExamineIndex.config will be abundant right? All data that I would want to get indexed with Look have to be added with a "LookService"?
Right now I'm receiving text in a raw format, including html tags. How do I get rid of these but still keep the tag highlighting the matched string?
Also, Before I tried to get highlighting working a had a raw lucene string, would this work with Look?
var luceneString = "pageTitleSearch:" + searchTerm + ""; luceneString += " aTextSearch:" + searchTerm + ""; luceneString += " FileTextContent:" + searchTerm + ""; luceneString += " updateDocsPyramidVersion:" + searchTerm + "";
var query = searchCriteria.RawQuery(luceneString); var searchResults = searcher.Search(query);
Thank you so much for your help Hendy!
1) Yes, it automatically hooks into all Umbraco Examine indexes
2) Correct, all Look indexers are added as you have done - so config fields are not necessary
3) If you want to extract text from markup, this could be done in a number of ways: a RegEx, or perhaps using the HtmlAgilityKit...
Great, what about the luceneString? That would be my last question... I swear...
4) to make look extend your original query you can do the following:
This will take your original raw query and append any Look query criteria on top of it.
Awsome!
Okey, so I lied. I'm feel ashamed.
I cant get my original lucene search string to work.
I have this LookService:
Then in my razor view:
But from what I understand I'ts not possible to have the highlight on "RawQuery"?
I would like to be able to search for multiple fields with wildcard endings. Preferably showing each node with the highlighted string. If that makes any sense at all...
I'm not really sure how to get the "GetHighlight" on a "RawQuery"?
/David
Hi David,
In the code above you are not including your LuceneString as a raw query:
Also, the lookResult.Select(x => x.Highlight) will return just the highlights (you'll probably need the IPublishedContent back to do something with it ?) You can see all data returned if you do lookResult.ToArray();
However the highlight text returned will always be extracted from the Look indexed TextField rather than from many separate fields.
HTH, Hendy
btw, why not index all the separate fields you want to use as a source for text highlighting into the Text index then you won't need the raw query at all ? (you can use wild cards with the TextQuery.SearchText)
Hi Hendy,
Yeah, the markup is confusing, I left the ”luceneString” in order to show what the string looked like.
I did try to add the luceneString with a raw query ”lookQuery.RawQuery = luceneString;” but I was unable to get any rendered result with highlight. lookQuery.RawQuery.GetHighlight = true;
I will take a look at it again and se if I can sort it out. Once again, thank you so much for your time!
Hi Hendy,
I'm confused now. This is my setup:
My lucene raw qurey with wildcard ending. This will give correct results when searching, f.ex. searched "för" will get hits on "första", so the wildcard is working.
Now, declaring the RawQuery.
Then the getting the results with LookService:
Now I can do:
With @item.Item IPublishedContent I'm able to get my properties but what about highlighting and score?
I'm really confused about the RawQurey vs the TextQurey. According to the docs the RawQuery have no "SearchText", "GetHighlight" or "GetText" so how is it possible to retrieve highlight using RawQurey? Also receiving score value "NAN" seams odd?
Best regards /David
Hi David,
The Raw Query when set becomes part of the query specifying the results to return. However the highlight text is always taken from the custom Look Text field that you populated with your indexer.
For a highlight to be returned you'll need to specify a Text Query eg.
I've never tried putting the Look Text field into a raw query, but that could be something that's changed in Look to make that work...
HTH, Hendy
Hi David,
I was just about to raise an issue on GitHub where a RawQuery + TextQuery.GetHighlight = true, should return highlights but then realised why the TextQuery.SearchText value must be set, as it is this text value that's required for the highlighting sub queries.
So to distinguish between RawQuery and TextQuery - A RawQuery is added into the search criteria, but Look doesn't really know anything about it. A TextQuery tells Look to search for text, and also uses the same text for the highlighting sub-queries.
HTH, Thanks, Hendy
Hi Jonny,
I wrote an extension for the Examine.SearchResult class that can be used to get the highlight easily:
Hope that points you in right direction.
Thank you Dan. This is really helpful.
The part I am stuck on is building the actual Query to pass into the highlighter?? Any help would be welcome.
LuceneIndexer indexer = (LuceneIndexer)ExamineManager.Instance.IndexProviderCollection["ExternalIndexer"];
IndexSearcher searcher = new IndexSearcher(indexer.GetLuceneDirectory(), false);
var luceneQuery = new Query(); // how to build the query with the search keyword??
var highlighter = new LuceneHighlighter(searcher, luceneQuery, "text-warning");
Examine.SearchResult highlightResult = new SearchResult(); var summary = highlightResult.GetHighlightForField("bodyText", highlighter);
Have you looked at the docs at https://our.umbraco.com/Documentation/Reference/Searching/Examine/quick-start ?
Hey Jonny - did you get this working? Trying to do something similar.
O.
is working on a reply...