Copied to clipboard

Flag this post as spam?

This post will be reported to the moderators as potential spam to be looked at


  • Carlos Casalicchio 176 posts 736 karma points
    Aug 20, 2024 @ 23:38
    Carlos Casalicchio
    0

    Examine Index Search returns all results for letterCategory == 'A'

    Hi there,

    I have created an author Index with the following fields

           public class AuthorValueSetBuilder : IValueSetBuilder<Author>
            {
                public IEnumerable<ValueSet> GetValueSets(params Author[] data)
                {
                    foreach (var item in data)
                    {
                        var indexValues = new Dictionary<string, object>
                        {
                            ["id"] = item.Id,
                            ["slug"] = item.Slug,
                            ["name"] = item.Name,
                            ["birthPlace"] = item.Birthplace ?? "",
                            ["city"] = item.City ?? "",
                            ["state"] = item.State ?? "",
                            ["nationalityId"] = item.NationalityId ?? "",
                            ["nationalityProfessionId"] = item.NationalityProfessionId ?? "",
                            ["professionId"] = item.ProfessionId ?? "",
                            ["letterCategory"] = item.LetterCategory.ToString(),
                        };
                        var valueSet = new ValueSet(item.Slug, "author", indexValues);
                        yield return valueSet;
                    }
                }
            }
    

    But, when I try to search by letter category, 'A' returns ALL the authors, but 'B', 'C', 'D', etc all return the correct results. Is this a bug in my code?

       public IEnumerable<ISearchResult> GetAuthorsFromLetterCategory(char letter, out long totalItemCount)
        {
            totalItemCount = 0;
            if (_examineManager.TryGetIndex(AuthorsIndex.INDEX_NAME, out var index))
            {
                var results = index
                     .Searcher
                     .CreateQuery()
                     .Field("__IndexType", "author")
                     .And().Field("letterCategory", letter.ToString().Boost(10f))
                     .Execute();
    
                totalItemCount = results.TotalItemCount;
                if (results.Any())
                {
                    return results;
                }
            }
            return [];
        }
    

    A results B results C results index-fields a-count b-count c-count

    HELP!! :-(

  • Yakov Lebski 594 posts 2350 karma points
    Aug 21, 2024 @ 02:24
    Yakov Lebski
    0

    I belive that "A" is stop word and removed from query, you can change analyzer or change stopwords dictionary

  • Carlos Casalicchio 176 posts 736 karma points
    Aug 21, 2024 @ 23:54
    Carlos Casalicchio
    0

    Thank you Yakov Lebski,

    I did find some suggestions online, but none has worked so far. Has anyone encountered this situation before?

    Tried

    options.Analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48, CharArraySet.EMPTY_SET);
    
    
     options.Analyzer = new KeywordAnalyzer(); // made no difference either
    

    I also tried creating a custom analyzer:

    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.Standard;
    using Lucene.Net.Util;
    
    namespace QuoteTab.Umbraco.Core.Analyzers
    {
        public class NoStopWordsAnalyzer : Analyzer
        {
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                // Use a standard tokenizer
                Tokenizer tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_48, reader);
    
                // Directly pass the tokenizer without adding a stop word filter
                TokenStream tokenStream = tokenizer;
    
                return new TokenStreamComponents(tokenizer, tokenStream);
            }
        }
    }
    

    did register it with the custom index options:

    if (!string.IsNullOrEmpty(name) && name.Equals(AuthorsIndex.INDEX_NAME))
            {
                options.Analyzer = new NoStopWordsAnalyzer();
                options.FieldDefinitions = new FieldDefinitionCollection(
                new FieldDefinition("slug", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("name", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("letterCategory", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("birthPlace", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("city", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("state", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("professionId", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("nationalityId", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("nationalityProfessionId", FieldDefinitionTypes.FullTextSortable)
                );
                options.UnlockIndex = true;
                if (_settings.Value.LuceneDirectoryFactory == LuceneDirectoryFactory.SyncedTempFileSystemDirectoryFactory)
                {
                    // if this directory factory is enabled then a snapshot deletion policy is required
                    options.IndexDeletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
                }
            }
    

    and configured the custom options in a composer

            builder.Services.ConfigureOptions<ConfigureIndexOptions>();
            builder.Services.AddSingleton<AuthorValueSetBuilder>();
            builder.Services.AddSingleton<IIndexPopulator, AuthorsIndexPopulator>();
            builder.Services.AddExamineLuceneIndex<AuthorsIndex, ConfigurationEnabledDirectoryFactory>(AuthorsIndex.INDEX_NAME);
    

    Nothing is working so far, and the BackOffice is showing StandardAnalyzer instead of the NoStopWordsAnalyzer I just created. index

    Any documentation I could look into? I have no idea what is wrong :(

  • Yakov Lebski 594 posts 2350 karma points
    Aug 22, 2024 @ 03:29
    Yakov Lebski
    0

    can you try

    var searcher = (BaseLuceneSearcher)index.Searcher;    
    var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48, CharArraySet.EMPTY_SET);
    
        var query = searcher.CreateQuery(null, BooleanOperation.And, analyzer, new Examine.Lucene.Search.LuceneSearchOptions() { });
    
  • Carlos Casalicchio 176 posts 736 karma points
    Aug 22, 2024 @ 19:25
    Carlos Casalicchio
    0

    I tried with new StandardAnalyzer(LuceneVersion.LUCENE_48, CharArraySet.EMPTY_SET); and also tried like this:

    public IEnumerable<ISearchResult> GetAuthorsFromLetterCategory(char letter, out long totalItemCount)
        {
            totalItemCount = 0;
            if (_examineManager.TryGetIndex(AuthorsIndex.INDEX_NAME, out var index))
            {
                var searchTerm = letter.ToString();
                var analyzer = new NoStopWordsAnalyzer();
                var searcher = (BaseLuceneSearcher)index.Searcher;
                var query = searcher
                       .CreateQuery(null, BooleanOperation.And, analyzer, new Examine.Lucene.Search.LuceneSearchOptions() { })
                       .Field("__IndexType", "author")
                       .And().Field("letterCategory", searchTerm)
                       .Execute();
    
                totalItemCount = query.TotalItemCount;
                if (query.Any())
                {
                    return query;
                }
            }
            return [];
        }
    

    The results are retuing 0 now, instead of all of them, so at least something changed! results for A

    Is it possible that I'm setting up my index options incorrectly?

    if (!string.IsNullOrEmpty(name) && name.Equals(AuthorsIndex.INDEX_NAME))
            {
                options.Analyzer = new NoStopWordsAnalyzer();
                options.FieldDefinitions = new FieldDefinitionCollection(
                new FieldDefinition("slug", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("name", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("letterCategory", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("birthPlace", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("city", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("state", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("professionId", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("nationalityId", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("nationalityProfessionId", FieldDefinitionTypes.FullTextSortable)
                );
                options.UnlockIndex = true;
                if (_settings.Value.LuceneDirectoryFactory == LuceneDirectoryFactory.SyncedTempFileSystemDirectoryFactory)
                {
                    // if this directory factory is enabled then a snapshot deletion policy is required
                    options.IndexDeletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
                }
            }
    
  • Carlos Casalicchio 176 posts 736 karma points
    Aug 30, 2024 @ 20:52
    Carlos Casalicchio
    0

    I have made several attempts, using different approaches, and it's always the same, it works for other letters (B,C,D...) but not for A.

    for now I'm stuck :(

    Even tried this (just for reference, not actual solution)

    List<LetterCategorySearchResult> results = new();
            using (var directory = FSDirectory.Open(new DirectoryInfo("umbraco\\Data\\TEMP\\ExamineIndexes\\AuthorsIndex")))
            using (var analyzer = new NoStopWordsAnalyzer()) // Use your custom analyzer here
            {
                // Create a query parser
                var queryParser = new QueryParser(LuceneVersion.LUCENE_48, "letterCategory", analyzer);
    
                // Parse the query
                Query query = queryParser.Parse(letter.ToString());
    
                // Set up the searcher
                using (var reader = DirectoryReader.Open(directory))
                {
                    var searcher = new IndexSearcher(reader);
    
                    // Execute the search
                    TopDocs topDocs = searcher.Search(query, 9999999);
                    foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                    {
                        Document doc = searcher.Doc(scoreDoc.Doc);
                        LetterCategorySearchResult searchResult = new(doc, scoreDoc.Score);
    
                        results.Add(searchResult);
                    }
                }
    
                return results;
            }
    

    It also doesn't seem to matter the type of Analyzer I configure with the index, Umbraco always displays it as StandardAnalyzer. Does that mean the backoffice has a bug and it doesn't show the correct analyzer, OR Umbraco has a bug and it doesn't take the custom analyzer?

    backoffice code for whitespace analyzer

    And also like this (with no success)

                if (!string.IsNullOrEmpty(name) && name.Equals(AuthorsIndex.INDEX_NAME))
            {
                options.Analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48, new CharArraySet(LuceneVersion.LUCENE_48, 0, true));
                options.FieldDefinitions = new FieldDefinitionCollection(
                new FieldDefinition("name", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("birthPlace", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("city", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("professionId", FieldDefinitionTypes.FullTextSortable),
                new FieldDefinition("nationalityId", FieldDefinitionTypes.FullTextSortable)
                );
                options.UnlockIndex = true;
                if (_settings.Value.LuceneDirectoryFactory == LuceneDirectoryFactory.SyncedTempFileSystemDirectoryFactory)
                {
                    // if this directory factory is enabled then a snapshot deletion policy is required
                    options.IndexDeletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
                }
            }
    

    And the search

    public IEnumerable<ISearchResult> GetAuthorsFromLetterCategory(char letter, out long totalItemCount)
        {
            totalItemCount = 0;
            //https://our.umbraco.com/forum/using-umbraco-and-getting-started/114576-examine-index-search-returns-all-results-for-lettercategory-a
    
            if (_examineManager.TryGetIndex(AuthorsIndex.INDEX_NAME, out var index))
            {
                var searchTerm = letter.ToString();
                var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48, new CharArraySet(LuceneVersion.LUCENE_48, 0, true));
                var searcher = index.Searcher as LuceneSearcher;
                var query = searcher!
                       .CreateQuery(null, BooleanOperation.Or, analyzer, new Examine.Lucene.Search.LuceneSearchOptions())
                       .Field("__IndexType", "author")
                       .And().Field("letterCategory", searchTerm)
                       .Execute();
    
                totalItemCount = query.TotalItemCount;
                if (query.Any())
                {
                    return query;
                }
            }
            return [];
        }
    
  • Carlos Casalicchio 176 posts 736 karma points
    Sep 01, 2024 @ 02:21
    Carlos Casalicchio
    100

    Ended up using this FUGLY hack.... just in case anyone needs it, or finds a solution

    private static IDictionary<string, List<string>> CreateFieldValues()
        {
            var dictionary = new Dictionary<string, List<string>>
            {
                { "__IndexType", new List<string> { "author" } }
            };
    
            return dictionary;
        }
        private Func<IDictionary<string, List<string>>> lazyFieldVals = CreateFieldValues;
    
        public IEnumerable<ISearchResult> GetAuthorsFromLetterCategory(char letter, out long totalItemCount)
        {
            totalItemCount = 0;
            if (letter == 'A')
            {
    
                //Fugly hack until solution is found
                var authors = _quoteTabDbContext.Authors.Where(x => x.LetterCategory == letter).AsNoTracking().ToList();
    
                List<SearchResult> results = [];
                foreach (var author in authors)
                {
                    var stats = _quoteTabDbContext.AuthorStatistics.AsNoTracking().FirstOrDefault(x => x.Slug == author.Slug);
                    var score = stats?.Rank ?? 0f;
                    results.Add(new SearchResult(author.Id.ToString(), (float)score, lazyFieldVals));
                }
                totalItemCount = authors.Count;
                return results.OrderBy(x => x.Score);
            }
            else
            {
    
    
                //https://our.umbraco.com/forum/using-umbraco-and-getting-started/114576-examine-index-search-returns-all-results-for-lettercategory-a
    
                if (_examineManager.TryGetIndex(AuthorsIndex.INDEX_NAME, out var index))
                {
                    var searchTerm = letter.ToString();
                    var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48, new CharArraySet(LuceneVersion.LUCENE_48, 0, true));
                    var searcher = index.Searcher as LuceneSearcher;
                    var query = searcher!
                           .CreateQuery(null, BooleanOperation.Or, analyzer, new Examine.Lucene.Search.LuceneSearchOptions())
                           .Field("__IndexType", "author")
                           .And().Field("letterCategory", searchTerm)
                           .Execute();
    
                    totalItemCount = query.TotalItemCount;
                    if (query.Any())
                    {
                        return query;
                    }
                }
            }
            return [];
        }
    
Please Sign in or register to post replies

Write your reply to:

Draft