// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
int docid = incrNumDocsCreated();
Document doc = new Document();
doc.add(new Field(ID_FIELD, docid+"", storeVal, indexVal, termVecVal));
if (docData.getName()!=null) {
String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
///////////////////
doc.add(new Field(NAME_FIELD, name, Field.Store.YES,Field.Index.UN_TOKENIZED, termVecVal));
}
if (docData.getDate()!=null) {
String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
doc.add(new Field(DATE_FIELD, dateStr, storeVal, indexVal, termVecVal));
}
if (docData.getTitle()!=null) {
doc.add(new Field(TITLE_FIELD, docData.getTitle(), storeVal, indexVal, termVecVal));
}
if (docData.getBody()!=null && docData.getBody().length()>0) {
String bdy;
if (size<=0 || size>=docData.getBody().length()) {
bdy = docData.getBody(); // use all
docData.setBody(""); // nothing left
} else {
// attempt not to break words - if whitespace found within next 20 chars...
for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
if (Character.isWhitespace(docData.getBody().charAt(n))) {
size = n;
break;
}
}
bdy = docData.getBody().substring(0,size); // use part
docData.setBody(docData.getBody().substring(size)); // some left
}
doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, Field.TermVector.YES));
if (storeBytes == true) {
doc.add(new Field(BYTES_FIELD, bdy.getBytes("UTF-8"), Field.Store.YES));
}
}
if (docData.getProps()!=null) {
for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
String key = (String) it.next();
String val = (String) docData.getProps().get(key);
doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
}
docData.setProps(null);
}
//System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
return doc;