@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
mltQuery.setSimilarity(parseContext.searchSimilarity());
Analyzer analyzer = null;
List<String> moreLikeFields = null;
boolean failOnUnsupportedField = true;
String queryName = null;
boolean include = false;
XContentParser.Token token;
String currentFieldName = null;
List<String> likeTexts = new ArrayList<>();
MultiTermVectorsRequest items = new MultiTermVectorsRequest();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if (Fields.LIKE_TEXT.match(currentFieldName, parseContext.parseFlags())) {
likeTexts.add(parser.text());
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, items);
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinTermFrequency(parser.intValue());
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMaxQueryTerms(parser.intValue());
} else if (Fields.MIN_DOC_FREQ.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinDocFreq(parser.intValue());
} else if (Fields.MAX_DOC_FREQ.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMaxDocFreq(parser.intValue());
} else if (Fields.MIN_WORD_LENGTH.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinWordLen(parser.intValue());
} else if (Fields.MAX_WORD_LENGTH.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMaxWordLen(parser.intValue());
} else if (Fields.BOOST_TERMS.match(currentFieldName, parseContext.parseFlags())) {
float boostFactor = parser.floatValue();
if (boostFactor != 0) {
mltQuery.setBoostTerms(true);
mltQuery.setBoostTermsFactor(boostFactor);
}
} else if (Fields.MINIMUM_SHOULD_MATCH.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinimumShouldMatch(parser.text());
} else if (Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinimumShouldMatch(Math.round(parser.floatValue() * 100) + "%");
} else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("boost".equals(currentFieldName)) {
mltQuery.setBoost(parser.floatValue());
} else if (Fields.FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName, parseContext.parseFlags())) {
failOnUnsupportedField = parser.booleanValue();
} else if ("_name".equals(currentFieldName)) {
queryName = parser.text();
} else if (Fields.INCLUDE.match(currentFieldName, parseContext.parseFlags())) {
include = parser.booleanValue();
} else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
}
} else if (token == XContentParser.Token.START_ARRAY) {
if (Fields.STOP_WORDS.match(currentFieldName, parseContext.parseFlags())) {
Set<String> stopWords = Sets.newHashSet();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
stopWords.add(parser.text());
}
mltQuery.setStopWords(stopWords);
} else if ("fields".equals(currentFieldName)) {
moreLikeFields = Lists.newLinkedList();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
moreLikeFields.add(parseContext.indexName(parser.text()));
}
} else if (Fields.DOCUMENT_IDS.match(currentFieldName, parseContext.parseFlags())) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
}
items.add(newTermVectorRequest().id(parser.text()));
}
} else if (Fields.DOCUMENTS.match(currentFieldName, parseContext.parseFlags())) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
}
items.add(parseDocument(parser));
}
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
parseLikeField(parser, likeTexts, items);
}
} else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
}
} else if (token == XContentParser.Token.START_OBJECT) {
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, items);
} else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
}
}
}
if (likeTexts.isEmpty() && items.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified");
}
if (moreLikeFields != null && moreLikeFields.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
}
// set analyzer
if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer();
}
mltQuery.setAnalyzer(analyzer);
// set like text fields
boolean useDefaultField = (moreLikeFields == null);
if (useDefaultField) {
moreLikeFields = Lists.newArrayList(parseContext.defaultField());
}
// possibly remove unsupported fields
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) {
return null;
}
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
// support for named query
if (queryName != null) {
parseContext.addNamedQuery(queryName, mltQuery);
}
// handle like texts
if (!likeTexts.isEmpty()) {
mltQuery.setLikeText(likeTexts);
}
// handle items
if (!items.isEmpty()) {
// set default index, type and fields if not specified
for (TermVectorRequest item : items) {
if (item.index() == null) {
item.index(parseContext.index().name());
}
if (item.type() == null) {
if (parseContext.queryTypes().size() > 1) {
throw new QueryParsingException(parseContext.index(),
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
} else {
item.type(parseContext.queryTypes().iterator().next());
}
}
// default fields if not present but don't override for artificial docs
if (item.selectedFields() == null && item.doc() == null) {
if (useDefaultField) {
item.selectedFields("*");
} else {
item.selectedFields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
}
}
}
// fetching the items with multi-termvectors API
BooleanQuery boolQuery = new BooleanQuery();
org.apache.lucene.index.Fields[] likeFields = fetchService.fetch(items);
mltQuery.setLikeText(likeFields);
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
// exclude the items from the search
if (!include) {
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items.getRequests()));
ConstantScoreQuery query = new ConstantScoreQuery(filter);