/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.termvector;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.DocumentRequest;
import org.elasticsearch.action.ValidateActions;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
/**
* Request returning the term vector (doc frequency, positions, offsets) for a
* document.
* <p/>
* Note, the {@link #index()}, {@link #type(String)} and {@link #id(String)} are
* required.
*/
public class TermVectorRequest extends SingleShardOperationRequest<TermVectorRequest> implements DocumentRequest<TermVectorRequest> {
private String type;
private String id;
private BytesReference doc;
private String routing;
protected String preference;
private static final AtomicInteger randomInt = new AtomicInteger(0);
// TODO: change to String[]
private Set<String> selectedFields;
Boolean realtime;
private Map<String, String> perFieldAnalyzer;
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
Flag.FieldStatistics);
public TermVectorRequest() {
}
/**
* Constructs a new term vector request for a document that will be fetch
* from the provided index. Use {@link #type(String)} and
* {@link #id(String)} to specify the document to load.
*/
public TermVectorRequest(String index, String type, String id) {
super(index);
this.id = id;
this.type = type;
}
/**
* Constructs a new term vector request for a document that will be fetch
* from the provided index. Use {@link #type(String)} and
* {@link #id(String)} to specify the document to load.
*/
public TermVectorRequest(TermVectorRequest other) {
super(other.index());
this.id = other.id();
this.type = other.type();
this.flagsEnum = other.getFlags().clone();
this.preference = other.preference();
this.routing = other.routing();
if (other.selectedFields != null) {
this.selectedFields = new HashSet<>(other.selectedFields);
}
this.realtime = other.realtime();
}
public TermVectorRequest(MultiGetRequest.Item item) {
super(item.index());
this.id = item.id();
this.type = item.type();
this.selectedFields(item.fields());
this.routing(item.routing());
}
public EnumSet<Flag> getFlags() {
return flagsEnum;
}
/**
* Sets the type of document to get the term vector for.
*/
public TermVectorRequest type(String type) {
this.type = type;
return this;
}
/**
* Returns the type of document to get the term vector for.
*/
public String type() {
return type;
}
/**
* Returns the id of document the term vector is requested for.
*/
public String id() {
return id;
}
/**
* Sets the id of document the term vector is requested for.
*/
public TermVectorRequest id(String id) {
this.id = id;
return this;
}
/**
* Returns the artificial document from which term vectors are requested for.
*/
public BytesReference doc() {
return doc;
}
/**
* Sets an artificial document from which term vectors are requested for.
*/
public TermVectorRequest doc(XContentBuilder documentBuilder) {
return this.doc(documentBuilder.bytes(), true);
}
/**
* Sets an artificial document from which term vectors are requested for.
*/
public TermVectorRequest doc(BytesReference doc, boolean generateRandomId) {
// assign a random id to this artificial document, for routing
if (generateRandomId) {
this.id(String.valueOf(randomInt.getAndAdd(1)));
}
this.doc = doc;
return this;
}
/**
* @return The routing for this request.
*/
public String routing() {
return routing;
}
public TermVectorRequest routing(String routing) {
this.routing = routing;
return this;
}
/**
* Sets the parent id of this document. Will simply set the routing to this
* value, as it is only used for routing with delete requests.
*/
public TermVectorRequest parent(String parent) {
if (routing == null) {
routing = parent;
}
return this;
}
public String preference() {
return this.preference;
}
/**
* Sets the preference to execute the search. Defaults to randomize across
* shards. Can be set to <tt>_local</tt> to prefer local shards,
* <tt>_primary</tt> to execute only on primary shards, or a custom value,
* which guarantees that the same order will be used across different
* requests.
*/
public TermVectorRequest preference(String preference) {
this.preference = preference;
return this;
}
/**
* Return the start and stop offsets for each term if they were stored or
* skip offsets.
*/
public TermVectorRequest offsets(boolean offsets) {
setFlag(Flag.Offsets, offsets);
return this;
}
/**
* @return <code>true</code> if term offsets should be returned. Otherwise
* <code>false</code>
*/
public boolean offsets() {
return flagsEnum.contains(Flag.Offsets);
}
/**
* Return the positions for each term if stored or skip.
*/
public TermVectorRequest positions(boolean positions) {
setFlag(Flag.Positions, positions);
return this;
}
/**
* @return Returns if the positions for each term should be returned if
* stored or skip.
*/
public boolean positions() {
return flagsEnum.contains(Flag.Positions);
}
/**
* @return <code>true</code> if term payloads should be returned. Otherwise
* <code>false</code>
*/
public boolean payloads() {
return flagsEnum.contains(Flag.Payloads);
}
/**
* Return the payloads for each term or skip.
*/
public TermVectorRequest payloads(boolean payloads) {
setFlag(Flag.Payloads, payloads);
return this;
}
/**
* @return <code>true</code> if term statistics should be returned.
* Otherwise <code>false</code>
*/
public boolean termStatistics() {
return flagsEnum.contains(Flag.TermStatistics);
}
/**
* Return the term statistics for each term in the shard or skip.
*/
public TermVectorRequest termStatistics(boolean termStatistics) {
setFlag(Flag.TermStatistics, termStatistics);
return this;
}
/**
* @return <code>true</code> if field statistics should be returned.
* Otherwise <code>false</code>
*/
public boolean fieldStatistics() {
return flagsEnum.contains(Flag.FieldStatistics);
}
/**
* Return the field statistics for each term in the shard or skip.
*/
public TermVectorRequest fieldStatistics(boolean fieldStatistics) {
setFlag(Flag.FieldStatistics, fieldStatistics);
return this;
}
/**
* @return <code>true</code> if distributed frequencies should be returned. Otherwise
* <code>false</code>
*/
public boolean dfs() {
return flagsEnum.contains(Flag.Dfs);
}
/**
* Use distributed frequencies instead of shard statistics.
*/
public TermVectorRequest dfs(boolean dfs) {
setFlag(Flag.Dfs, dfs);
return this;
}
/**
* Return only term vectors for special selected fields. Returns for term
* vectors for all fields if selectedFields == null
*/
public Set<String> selectedFields() {
return selectedFields;
}
/**
* Return only term vectors for special selected fields. Returns the term
* vectors for all fields if selectedFields == null
*/
public TermVectorRequest selectedFields(String... fields) {
selectedFields = fields != null && fields.length != 0 ? Sets.newHashSet(fields) : null;
return this;
}
/**
* Return whether term vectors should be generated real-time (default to true).
*/
public boolean realtime() {
return this.realtime == null ? true : this.realtime;
}
/**
* Choose whether term vectors be generated real-time.
*/
public TermVectorRequest realtime(Boolean realtime) {
this.realtime = realtime;
return this;
}
/**
* Return the overridden analyzers at each field.
*/
public Map<String, String> perFieldAnalyzer() {
return perFieldAnalyzer;
}
/**
* Override the analyzer used at each field when generating term vectors.
*/
public TermVectorRequest perFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
this.perFieldAnalyzer = perFieldAnalyzer != null && perFieldAnalyzer.size() != 0 ? Maps.newHashMap(perFieldAnalyzer) : null;
return this;
}
private void setFlag(Flag flag, boolean set) {
if (set && !flagsEnum.contains(flag)) {
flagsEnum.add(flag);
} else if (!set) {
flagsEnum.remove(flag);
assert (!flagsEnum.contains(flag));
}
}
@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = super.validate();
if (type == null) {
validationException = ValidateActions.addValidationError("type is missing", validationException);
}
if (id == null && doc == null) {
validationException = ValidateActions.addValidationError("id or doc is missing", validationException);
}
return validationException;
}
public static TermVectorRequest readTermVectorRequest(StreamInput in) throws IOException {
TermVectorRequest termVectorRequest = new TermVectorRequest();
termVectorRequest.readFrom(in);
return termVectorRequest;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
if (in.getVersion().before(Version.V_1_4_0_Beta1)) {
//term vector used to read & write the index twice, here and in the parent class
in.readString();
}
type = in.readString();
id = in.readString();
if (in.getVersion().onOrAfter(Version.V_1_4_0_Beta1)) {
if (in.readBoolean()) {
doc = in.readBytesReference();
}
}
routing = in.readOptionalString();
preference = in.readOptionalString();
long flags = in.readVLong();
flagsEnum.clear();
for (Flag flag : Flag.values()) {
if ((flags & (1 << flag.ordinal())) != 0) {
flagsEnum.add(flag);
}
}
int numSelectedFields = in.readVInt();
if (numSelectedFields > 0) {
selectedFields = new HashSet<>();
for (int i = 0; i < numSelectedFields; i++) {
selectedFields.add(in.readString());
}
}
if (in.getVersion().onOrAfter(Version.V_1_5_0)) {
if (in.readBoolean()) {
perFieldAnalyzer = readPerFieldAnalyzer(in.readMap());
}
this.realtime = in.readBoolean();
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
if (out.getVersion().before(Version.V_1_4_0_Beta1)) {
//term vector used to read & write the index twice, here and in the parent class
out.writeString(index);
}
out.writeString(type);
out.writeString(id);
if (out.getVersion().onOrAfter(Version.V_1_4_0_Beta1)) {
out.writeBoolean(doc != null);
if (doc != null) {
out.writeBytesReference(doc);
}
}
out.writeOptionalString(routing);
out.writeOptionalString(preference);
long longFlags = 0;
for (Flag flag : flagsEnum) {
longFlags |= (1 << flag.ordinal());
}
out.writeVLong(longFlags);
if (selectedFields != null) {
out.writeVInt(selectedFields.size());
for (String selectedField : selectedFields) {
out.writeString(selectedField);
}
} else {
out.writeVInt(0);
}
if (out.getVersion().onOrAfter(Version.V_1_5_0)) {
out.writeBoolean(perFieldAnalyzer != null);
if (perFieldAnalyzer != null) {
out.writeGenericValue(perFieldAnalyzer);
}
out.writeBoolean(realtime());
}
}
public static enum Flag {
// Do not change the order of these flags we use
// the ordinal for encoding! Only append to the end!
Positions, Offsets, Payloads, FieldStatistics, TermStatistics, Dfs
}
/**
* populates a request object (pre-populated with defaults) based on a parser.
*/
public static void parseRequest(TermVectorRequest termVectorRequest, XContentParser parser) throws IOException {
XContentParser.Token token;
String currentFieldName = null;
List<String> fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (currentFieldName != null) {
if (currentFieldName.equals("fields")) {
if (token == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());
}
} else {
throw new ElasticsearchParseException(
"The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]");
}
} else if (currentFieldName.equals("offsets")) {
termVectorRequest.offsets(parser.booleanValue());
} else if (currentFieldName.equals("positions")) {
termVectorRequest.positions(parser.booleanValue());
} else if (currentFieldName.equals("payloads")) {
termVectorRequest.payloads(parser.booleanValue());
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
termVectorRequest.termStatistics(parser.booleanValue());
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
termVectorRequest.fieldStatistics(parser.booleanValue());
} else if (currentFieldName.equals("dfs")) {
termVectorRequest.dfs(parser.booleanValue());
} else if (currentFieldName.equals("per_field_analyzer") || currentFieldName.equals("perFieldAnalyzer")) {
termVectorRequest.perFieldAnalyzer(readPerFieldAnalyzer(parser.map()));
} else if ("_index".equals(currentFieldName)) { // the following is important for multi request parsing.
termVectorRequest.index = parser.text();
} else if ("_type".equals(currentFieldName)) {
termVectorRequest.type = parser.text();
} else if ("_id".equals(currentFieldName)) {
if (termVectorRequest.doc != null) {
throw new ElasticsearchParseException("Either \"id\" or \"doc\" can be specified, but not both!");
}
termVectorRequest.id = parser.text();
} else if ("doc".equals(currentFieldName)) {
if (termVectorRequest.id != null) {
throw new ElasticsearchParseException("Either \"id\" or \"doc\" can be specified, but not both!");
}
termVectorRequest.doc(jsonBuilder().copyCurrentStructure(parser));
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
termVectorRequest.routing = parser.text();
} else {
throw new ElasticsearchParseException("The parameter " + currentFieldName
+ " is not valid for term vector request!");
}
}
}
if (fields.size() > 0) {
String[] fieldsAsArray = new String[fields.size()];
termVectorRequest.selectedFields(fields.toArray(fieldsAsArray));
}
}
private static Map<String, String> readPerFieldAnalyzer(Map<String, Object> map) {
Map<String, String> mapStrStr = new HashMap<>();
for (Map.Entry<String, Object> e : map.entrySet()) {
if (e.getValue() instanceof String) {
mapStrStr.put(e.getKey(), (String) e.getValue());
} else {
throw new ElasticsearchException(
"The analyzer at " + e.getKey() + " should be of type String, but got a " + e.getValue().getClass() + "!");
}
}
return mapStrStr;
}
}