I only get results for a single search term, not even a phrase with two terms. Looking at UmbracoExamine.XsltExtensions it boils down to the following:
if (provider == null)
thrownewArgumentNullException("provider");
var results = provider.Search(searchText,
useWildcards, indexType);
return GetResultsAsXml(results);
}
Is there something I need to set in the configuration? Just learning this Lucene stuff. XSLTSearch has been enough for most clients, but this one needed PDFs indexed. I must be having an ADD moment cause I can't seem to focus on which rabbit hole to jump down in.
Here's the final results to help folks on their journey. What it does: combines content & PDF search results utilizing Lucene in XSLT. Where it falls short: It's lacking proper scoring as it combines two sets together as a string then I convert it to a node-set to traverse through. Since this replaced XSLTSearch for me, you'll notice some of the same markup and style references. Hopefully I'll have some free time I can continue the transformation to apply some of the same XSLTSearch parameters and enhancements to provide a fully functioning alternative.
You need to create a PDF Index and I don't recommend using the default index for content; create your own (for the example below, I'm using the default). So here's my XSLT:
public static XPathNodeIterator SearchPDF(string searchTerm) { if (string.IsNullOrEmpty(searchTerm)) return null;
var searchProvider = "PDFSearcher"; var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as UmbracoExamineSearcher; var criteria = provider.CreateSearchCriteria(BooleanOperation.Or);
int i = 0; foreach (string field in fields) { for (i = 0; i < terms.Length; i++) { if (filter == null) filter = criteria.Field(field, terms[i]); else filter = filter.Or().Field(field, terms[i]); } }
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results); }
public static XPathNodeIterator Search(string searchTerm) { if (string.IsNullOrEmpty(searchTerm)) return null;
var searchProvider = ExamineManager.Instance.DefaultSearchProvider.Name; var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as UmbracoExamineSearcher; var criteria = provider.CreateSearchCriteria(IndexTypes.Content, BooleanOperation.Or);
int i = 0; foreach (string field in fields) { for (i = 0; i < terms.Length; i++) { if (filter == null) filter = criteria.Field(field, terms[i]); else filter = filter.Or().Field(field, terms[i]); } }
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results); }
private static XPathNodeIterator GetResultsAsXml(ISearchResults results) { XDocument doc = new XDocument(); XElement root = new XElement("nodes"); foreach (SearchResult result in results) { XElement node = new XElement("node"); XAttribute nodeId = new XAttribute("id", result.Id); XAttribute nodeScore = new XAttribute("score", result.Score); node.Add(nodeId, nodeScore);
foreach (KeyValuePair<String, String> field in result.Fields) { XElement data = new XElement("data"); XAttribute alias = new XAttribute("alias", field.Key); XCData value = new XCData(field.Value); data.Add(alias, value); node.Add(data); } root.Add(node); } doc.Add(root); return doc.CreateNavigator().Select("/"); } ]]> </msxsl:script> </xsl:stylesheet>
XSLT Lucene results using multiple terms
I won't go into the whole story, so I'll get to my issue. Here's the snippet of logic I'm using to generate my results in case you need a visual:
<xsl:variable name="resultsRaw">
<results>
<xsl:for-each select="msxml:node-set(umbraco.examine:SearchContentOnly($searchTerm,true()))/nodes/node" >
<xsl:copy-of select="." />
</xsl:for-each>
<xsl:for-each select="msxml:node-set(umbraco.examine:Search($searchTerm,true(),'PDFSearcher'))/nodes/node" >
<xsl:copy-of select="." />
</xsl:for-each>
</results>
</xsl:variable>
<xsl:variable name="results" select="msxml:node-set($resultsRaw)/results" />
I only get results for a single search term, not even a phrase with two terms. Looking at UmbracoExamine.XsltExtensions it boils down to the following:
internal static XPathNodeIterator Search(string searchText, bool useWildcards, LuceneSearcher provider, string indexType)
{
if (provider == null) throw new ArgumentNullException("provider");
var results = provider.Search(searchText, useWildcards, indexType);
return GetResultsAsXml(results);
}
Is there something I need to set in the configuration? Just learning this Lucene stuff. XSLTSearch has been enough for most clients, but this one needed PDFs indexed. I must be having an ADD moment cause I can't seem to focus on which rabbit hole to jump down in.
Jon,
When doing a non xslt examine query you have to split the keyword on whitespace then add each term to the query eg
if(qsValue.Contains(" "))
{
string[] terms = qsValue.Split(' ');
int termCount = 0;
foreach (var term in terms)
{
if(termCount==0){
queryToBuild = queryToBuild.And().Field(key, term);
}
else{
queryToBuild = queryToBuild.Or().Field(key, term);
}
termCount++;
}
}
So you will need to update the xsltextensions search code to do something simlar
This is what I got so far looking at some of your (@Ismail) past posts and others out there:
public static XPathNodeIterator Search(string searchTerm)
{
if (string.IsNullOrEmpty(searchTerm)) return null;
var criteria = ExamineManager.Instance.CreateSearchCriteria(IndexTypes.Content);
string[] terms = searchTerm.Split(' ');
string[] fields = new string[] { "nodeName", "pageTitle", "metaKeywords", "metaDescription", "bodyContent" };
Examine.SearchCriteria.IBooleanOperation filter = null;
int i = 0;
for (i = 0; i < terms.Length; i++)
{
if (filter == null)
filter = criteria.GroupedOr(terms, terms[i]);
else
filter = filter.Or().GroupedOr(terms, terms[i]);
}
var searchProvider = ExamineManager.Instance.DefaultSearchProvider.Name;
var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as LuceneSearcher;
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results);
}
It's not generating any results. I read the GroupedOr may be bugged, so I'm thinking that's where I'm right, but wrong. Anyone have suggestions?
Can you write out the generated query just after
var results = provider.Search(filter.Compile());
writeout provider.ToString() that should give you the generated query.
Regards
Ismail
provider.ToString() outputs => "UmbracoExamine.UmbracoExamineSearcher"
filter.ToString() outputs => "Examine.LuceneEngine.SearchCriteria.LuceneBooleanOperation"
I don't believe the output is what you are expecting.
Ok, I made some slight changes, however, I'm observing some interesting results, so I think it's the query build logic I'm struggling with.
Here's my method:
public static XPathNodeIterator Search(string searchTerm)
{
if (string.IsNullOrEmpty(searchTerm)) return null;
var criteria = ExamineManager.Instance.CreateSearchCriteria(IndexTypes.Content);
string[] terms = searchTerm.Split(' ');
string[] fields = new string[] { "nodeName", "pageTitle", "metaKeywords", "metaDescription", "bodyContent" };
Examine.SearchCriteria.IBooleanOperation filter = null;
int i = 0;
foreach (string field in fields)
{
for (i = 0; i < terms.Length; i++)
{
if (filter == null)
filter = criteria.Field(field, terms[i]);
else
filter = filter.Or().Field(field, terms[i]);
}
}
var searchProvider = ExamineManager.Instance.DefaultSearchProvider.Name;
var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as LuceneSearcher;
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results);
}
I don't get any results; however, if I change the order of the fields I start getting results:
string[] fields = new string[] { "metaKeywords", "metaDescription", "pageContent", "pageTitle", "nodeName" };
I get some results. Now, if I limit the fields to just { "pageContent" } I get even more results. That ain't right.
I believe I figured it out. I needed to set the default BooleanOperator to Or.
var criteria = ExamineManager.Instance.CreateSearchCriteria(IndexTypes.Content, BooleanOperation.Or);
Here's the final results to help folks on their journey. What it does: combines content & PDF search results utilizing Lucene in XSLT. Where it falls short: It's lacking proper scoring as it combines two sets together as a string then I convert it to a node-set to traverse through. Since this replaced XSLTSearch for me, you'll notice some of the same markup and style references. Hopefully I'll have some free time I can continue the transformation to apply some of the same XSLTSearch parameters and enhancements to provide a fully functioning alternative.
You need to create a PDF Index and I don't recommend using the default index for content; create your own (for the example below, I'm using the default). So here's my XSLT:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xsl:stylesheet [ <!ENTITY nbsp " "> ]>
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:msxml="urn:schemas-microsoft-com:xslt"
xmlns:msxsl="urn:schemas-microsoft-com:xslt"
xmlns:umbraco.library="urn:umbraco.library"
xmlns:umbraco.examine="urn:umbraco.examine"
xmlns:umbraco.page="urn:umbraco.page"
exclude-result-prefixes="msxml umbraco.library umbraco.examine umbraco.page">
<xsl:output method="html" omit-xml-declaration="yes"/>
<xsl:param name="currentPage"/>
<xsl:template match="/">
<xsl:variable name="searchTerm" select="umbraco.library:RequestQueryString('q')" />
<div id="xsltsearch">
<form method="get">
<input name="q" type="text" class="input" value="{umbraco.library:StripHtml($searchTerm)}" />
<input type="submit" class="submit" value="Search"/>
</form>
<xsl:if test="string-length($searchTerm) > 0">
<xsl:variable name="resultsRaw">
<results>
<xsl:for-each select="msxml:node-set(umbraco.page:Search($searchTerm))/nodes/node [string(hideFromSearch) != '1']" >
<xsl:copy-of select="." />
</xsl:for-each>
<xsl:for-each select="msxml:node-set(umbraco.page:SearchPDF($searchTerm))/nodes/node" >
<xsl:copy-of select="." />
</xsl:for-each>
</results>
</xsl:variable>
<xsl:variable name="results" select="msxml:node-set($resultsRaw)/results" />
<xsl:variable name="total" select="count($results/node)" />
<xsl:choose>
<xsl:when test="$total = 0">
<p id="xsltsearch_summary">No matches were found for <strong><xsl:value-of select="$searchTerm" /></strong></p>
</xsl:when>
<xsl:otherwise>
<p id="xsltsearch_summary">
Your search for <strong><xsl:value-of select="$searchTerm" /></strong>
<xsl:value-of select="umbraco.page:Pluralize($total,' match ',' matches ')" /> <strong><xsl:value-of select="$total" /></strong> <xsl:value-of select="umbraco.page:Pluralize($total,' result ',' results ')" />.
</p>
</xsl:otherwise>
</xsl:choose>
<div id="xsltsearch_results">
<xsl:for-each select="$results/node">
<xsl:sort select="./@score" order="descending" data-type="number" />
<div class="xsltsearch_result">
<xsl:choose>
<xsl:when test="./data [@alias='FileTextContent'] != ''">
<xsl:variable name="pdf" select="umbraco.library:GetMedia(./@id,0)" />
<p class="xsltsearch_result_title xsltsearch_pdf">
<a href="{$pdf/umbracoFile}" class="xsltsearch_title" target="_blank">
<xsl:value-of select="$pdf/@nodeName" disable-output-escaping="yes" />
</a>
</p>
<p class="xsltsearch_result_description">
<span class="xsltsearch_description">
<xsl:variable name="sample" select="umbraco.library:StripHtml(./data [@alias='FileTextContent'])" />
<xsl:value-of select="substring($sample,1,255)" />
<xsl:if test="string-length($sample) > 255">...</xsl:if>
</span>
</p>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="page" select="umbraco.library:GetXmlNodeById(./@id)" />
<p class="xsltsearch_result_title">
<a href="{umbraco.library:NiceUrl(./@id)}" class="xsltsearch_title">
<xsl:value-of select="$page/@nodeName" disable-output-escaping="yes" />
</a>
</p>
<p class="xsltsearch_result_description">
<span class="xsltsearch_description">
<xsl:variable name="sample">
<xsl:choose>
<xsl:when test="$page/metaDescription != ''">
<xsl:value-of select="umbraco.library:StripHtml($page/metaDescription)" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="umbraco.library:StripHtml($page/bodyContent)" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:value-of select="substring($sample,1,255)" />
<xsl:if test="string-length($sample) > 255">...</xsl:if>
</span>
</p>
</xsl:otherwise>
</xsl:choose>
</div>
</xsl:for-each>
</div>
</xsl:if>
</div>
</xsl:template>
<msxsl:script language="C#" implements-prefix="umbraco.page">
<msxsl:assembly name="Examine" />
<msxsl:assembly name="UmbracoExamine" />
<msxsl:assembly name="System.Xml.Linq" />
<msxsl:assembly name="System.Configuration" />
<msxsl:using namespace="Examine" />
<msxsl:using namespace="Examine.SearchCriteria" />
<msxsl:using namespace="Examine.LuceneEngine.Providers" />
<msxsl:using namespace="UmbracoExamine" />
<msxsl:using namespace="UmbracoExamine.SearchCriteria" />
<msxsl:using namespace="System.Collections.Generic" />
<msxsl:using namespace="System.Xml.Linq" />
<msxsl:using namespace="System.Xml.XPath" />
<![CDATA[
public static string Pluralize(int count, string singular, string plural)
{
return (count == 1) ? singular : plural;
}
public static XPathNodeIterator SearchPDF(string searchTerm)
{
if (string.IsNullOrEmpty(searchTerm)) return null;
var searchProvider = "PDFSearcher";
var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as UmbracoExamineSearcher;
var criteria = provider.CreateSearchCriteria(BooleanOperation.Or);
string[] terms = searchTerm.Split(' ');
string[] fields = new string[] { "FileTextContent" };
Examine.SearchCriteria.IBooleanOperation filter = null;
int i = 0;
foreach (string field in fields)
{
for (i = 0; i < terms.Length; i++)
{
if (filter == null)
filter = criteria.Field(field, terms[i]);
else
filter = filter.Or().Field(field, terms[i]);
}
}
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results);
}
public static XPathNodeIterator Search(string searchTerm)
{
if (string.IsNullOrEmpty(searchTerm)) return null;
var searchProvider = ExamineManager.Instance.DefaultSearchProvider.Name;
var provider = ExamineManager.Instance.SearchProviderCollection[searchProvider] as UmbracoExamineSearcher;
var criteria = provider.CreateSearchCriteria(IndexTypes.Content, BooleanOperation.Or);
string[] terms = searchTerm.Split(' ');
string[] fields = new string[] { "metaKeywords", "metaDescription", "pageContent", "pageTitle", "nodeName" };
Examine.SearchCriteria.IBooleanOperation filter = null;
int i = 0;
foreach (string field in fields)
{
for (i = 0; i < terms.Length; i++)
{
if (filter == null)
filter = criteria.Field(field, terms[i]);
else
filter = filter.Or().Field(field, terms[i]);
}
}
var results = provider.Search(filter.Compile());
return GetResultsAsXml(results);
}
private static XPathNodeIterator GetResultsAsXml(ISearchResults results)
{
XDocument doc = new XDocument();
XElement root = new XElement("nodes");
foreach (SearchResult result in results)
{
XElement node = new XElement("node");
XAttribute nodeId = new XAttribute("id", result.Id);
XAttribute nodeScore = new XAttribute("score", result.Score);
node.Add(nodeId, nodeScore);
foreach (KeyValuePair<String, String> field in result.Fields)
{
XElement data = new XElement("data");
XAttribute alias = new XAttribute("alias", field.Key);
XCData value = new XCData(field.Value);
data.Add(alias, value);
node.Add(data);
}
root.Add(node);
}
doc.Add(root);
return doc.CreateNavigator().Select("/");
}
]]>
</msxsl:script>
</xsl:stylesheet>
is working on a reply...