}
}
public void testMinShouldMatch() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
"there is the famous restaurant at the end of the universe",};
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + i, Field.Store.YES));
doc.add(newTextField("field", docs[i], Field.Store.NO));
w.addDocument(doc);
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setLowFreqMinimumNumberShouldMatch(0.5f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 1);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setLowFreqMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 1);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setLowFreqMinimumNumberShouldMatch(0.49f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
assertTrue(search.scoreDocs[1].score > search.scoreDocs[2].score);
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
query.setHighFreqMinimumNumberShouldMatch(4.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals(search.scoreDocs[1].score, search.scoreDocs[2].score, 0.0f);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
// doc 2 and 3 only get a score from low freq terms
assertEquals(
new HashSet<String>(Arrays.asList("2", "3")),
new HashSet<String>(Arrays.asList(
r.document(search.scoreDocs[1].doc).get("id"),
r.document(search.scoreDocs[2].doc).get("id"))));
}
{
// only high freq terms around - check that min should match is applied
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "the"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
query.setHighFreqMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 4);
}
{
// only high freq terms around - check that min should match is applied
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "the"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
query.setHighFreqMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 2);
assertEquals(
new HashSet<String>(Arrays.asList("0", "2")),
new HashSet<String>(Arrays.asList(
r.document(search.scoreDocs[0].doc).get("id"),
r.document(search.scoreDocs[1].doc).get("id"))));
}
r.close();
w.close();
dir.close();
}