Copied to clipboard

Flag this post as spam?

This post will be reported to the moderators as potential spam to be looked at


  • Gareth Wright 32 posts 100 karma points c-trib
    Apr 01, 2021 @ 08:42
    Gareth Wright
    0

    Examine and Custom Stop Words

    Has anyone been able to change the stop words used in Umbraco 8 Examine queries?

    I can't see to get it to pick up my reduced stopwords charset.

    I was able to do this in Umbraco 7 by creating a custom analyzer, but no effect in 8.

    I've tested the search in the back-office and it finds the term "will" in both the index and the searcher.

    However when I run the query, it returns no results.

    Happy to provide anymore information.

    Custom Analyzer

    public class CustomAnalyzer : StandardAnalyzer
        {
            private readonly Version _matchVersion;
            private readonly ISet<string> _stopWords;
            private readonly bool _enableStopPositionIncrements;
    ​
            public CustomAnalyzer() 
                : this(Version.LUCENE_30, new HashSet<string>(Constants.SearchConstants.StopWords))
            {}
    ​
            public CustomAnalyzer(Version matchVersion, ISet<string> stopWords)
                : base(matchVersion, stopWords)
            {
                _matchVersion = matchVersion;
                _stopWords = stopWords;
                _enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
            }     
    ​
            public override TokenStream TokenStream(string fieldName, TextReader reader)
            {
                return new StopFilter(_enableStopPositionIncrements,
                    new ASCIIFoldingFilter(
                        new LowerCaseFilter(
                            new StandardFilter(
                                new StandardTokenizer(_matchVersion, reader)
                                {
                                    MaxTokenLength = MaxTokenLength
                                }))), _stopWords);
            }
    ​
            public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
            {
                var savedStreams = (SavedStreams)PreviousTokenStream;
                if (savedStreams == null)
                {
                    savedStreams = new SavedStreams();
                    this.PreviousTokenStream = savedStreams;
                    savedStreams.TokenStream = new StandardTokenizer(_matchVersion, reader);
                    savedStreams.FilteredTokenStream = new StandardFilter(savedStreams.TokenStream);
                    savedStreams.FilteredTokenStream = new LowerCaseFilter(savedStreams.FilteredTokenStream);
                    savedStreams.FilteredTokenStream = new ASCIIFoldingFilter(savedStreams.FilteredTokenStream);
                    savedStreams.FilteredTokenStream = new StopFilter(_enableStopPositionIncrements, savedStreams.FilteredTokenStream, _stopWords);
                }
                else
                    savedStreams.TokenStream.Reset(reader);
                savedStreams.TokenStream.MaxTokenLength = MaxTokenLength;
                return savedStreams.FilteredTokenStream;
            }
    ​
            private sealed class SavedStreams
            {
                internal StandardTokenizer TokenStream;
                internal TokenStream FilteredTokenStream;
            }
    

    Adding Searcher

    ExamineManager.Instance.AddSearcher(new MultiIndexSearcher(_searchConfig.MultiIndexSearcherName, indexes, new CustomAnalyzer()));
    

    Query

    IQuery query = ((MultiIndexSearcher)searcher).CreateQuery(
                    "Search",
                    BooleanOperation.And,
                    new CustomAnalyzer(),
                    new LuceneSearchOptions());
      IBooleanOperation filter = null;
    ...
       filter = query.GroupedOr(string.Format(config.SearchFields, currentCulture).Split(','), request.TokenizedTerm.ToArray());
    ...
    filter.Execute()
    

    Stopwords

     public static List<string> StopWords = new List<string>()
            {
                "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into",
                "is", "it", "no", "not", "of", "on", "or", "such", "that", "their", "then",
                "there", "these", "they", "this", "to", "was", "with"
            };
    

    Help please :)

  • Mathias Hove 11 posts 71 karma points
    May 20, 2021 @ 06:35
    Mathias Hove
    0

    Hi Gareth.

    Did you find a solution for this? Im in the exact same situation.

    Overriding the stopwords, does not seem to do the trick! :)

Please Sign in or register to post replies

Write your reply to:

Draft