/**
* Copyright (C) 2010 EdgyTech LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.edgytech.umongo;
import com.mongodb.*;
import com.mongodb.util.JSON;
import java.io.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bson.BSONDecoder;
import org.bson.BasicBSONDecoder;
/**
*
* @author antoine
*/
public class DocumentDeserializer {
public enum Format {
JSON,
JSON_ARRAY,
JSON_SINGLE_DOC,
CSV,
BSON
}
Format format;
boolean first = true;
String fields;
String[] filter;
File file;
BufferedReader br;
InputStream is;
DBCallback callback;
BSONDecoder decoder;
Iterator iterator;
BasicDBObject template;
String delimiter = ",";
String quote = "\"";
Pattern pattern;
public DocumentDeserializer(Format format, String fields) {
this.format = format;
this.fields = fields;
if (fields != null) {
// this is from the form, always comma separated
filter = fields.split(",");
for (int i = 0; i < filter.length; ++i) {
filter[i] = filter[i].trim();
}
}
}
public Format getFormat() {
return format;
}
public String getFields() {
return fields;
}
void setTemplate(BasicDBObject template) {
this.template = template;
}
public void setInputStream(InputStream is) {
this.is = is;
}
public InputStream getInputStream() {
return is;
}
public void setFile(File file) {
this.file = file;
}
public File getFile() {
return file;
}
public class DocumentIterator implements java.util.Iterator<DBObject> {
public boolean hasNext() {
// try {
// if (fis != null)
// return fis.available() > 0;
// } catch (IOException ex) {
// Logger.getLogger(Serializer.class.getName()).log(Level.SEVERE, null, ex);
// }
// return false;
return true;
}
public DBObject next() {
try {
return readObject();
} catch (IOException ex) {
Logger.getLogger(DocumentDeserializer.class.getName()).log(Level.SEVERE, null, ex);
}
return null;
}
public void remove() {
throw new UnsupportedOperationException("Not supported yet.");
}
}
public DocumentIterator iterator() {
return new DocumentIterator();
}
private List<String> splitByCommasNotInQuotes(String s) {
List<String> list = new ArrayList<String>();
if (pattern == null) {
pattern = Pattern.compile("[" + quote + delimiter + "]");
}
Matcher m = pattern.matcher(s);
int pos = 0;
boolean quoteMode = false;
boolean wasQuoted = false;
while (m.find()) {
String sep = m.group();
if (quote.equals(sep)) {
int qpos = m.start();
if (!quoteMode) {
// only turn on quote mode if previous char was the delimiter
if (qpos == pos) {
quoteMode = true;
wasQuoted = true;
}
} else {
quoteMode = false;
}
} else if (!quoteMode && delimiter.equals(sep)) {
int toPos = m.start();
String token = s.substring(pos, toPos);
if (wasQuoted) {
token = token.substring(quote.length(), token.length() - quote.length());
}
list.add(token);
pos = m.end();
wasQuoted = false;
}
}
// previous loop always finishes on last limiter, need to add last token
if (pos <= s.length()) {
list.add(s.substring(pos));
}
return list;
}
public DBObject readObject() throws IOException {
if (first) {
if (format != Format.BSON) {
if (is == null) {
FileReader fr = new FileReader(file);
br = new BufferedReader(fr);
} else {
br = new BufferedReader(new InputStreamReader(is));
}
if (format == Format.CSV) {
fields = br.readLine();
if (fields != null) {
filter = fields.split(delimiter);
// field names are never quoted
for (int i = 0; i < filter.length; ++i) {
filter[i] = filter[i].trim();
}
}
}
} else {
if (is == null) {
is = new FileInputStream(file);
}
callback = new DefaultDBCallback(null);
decoder = new BasicBSONDecoder();
}
if (format == Format.JSON_ARRAY) {
String line = br.readLine();
BasicDBList list = (BasicDBList) JSON.parse(line);
iterator = list.iterator();
}
first = false;
}
if (format == Format.JSON_ARRAY) {
if (iterator == null || !iterator.hasNext()) {
return null;
}
return (DBObject) iterator.next();
}
DBObject obj = null;
if (format != Format.BSON) {
String line = br.readLine();
if (line == null) {
return null;
}
if (format == Format.JSON_SINGLE_DOC) {
// keep reading all lines
String line2 = null;
while ((line2 = br.readLine()) != null) {
line += line2;
}
}
if (format == Format.CSV) {
List<String> values = splitByCommasNotInQuotes(line);
if (template == null) {
obj = new BasicDBObject();
// set each field defined
for (int i = 0; i < filter.length; ++i) {
String val = values.get(i);
// string values are always quoted
obj.put(filter[i], JSON.parse(val));
}
} else {
obj = (BasicDBObject) template.copy();
fillInTemplate(obj, values);
}
} else {
obj = (DBObject) JSON.parse(line);
}
} else {
// BSON is binary
callback.reset();
try {
decoder.decode(is, callback);
} catch (IOException e) {
// most likely EOF
return null;
}
obj = (DBObject) callback.get();
// // read length
// byte[] buf = new byte[4096];
// int n = fis.read(buf, 0, 4);
// if (n <= 0) {
// return null;
// }
// int len = Bits.readInt(buf);
//
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
// baos.write(buf, 0, 4);
// int toread = len;
// while (toread > 0) {
// n = fis.read(buf, 0, Math.min(toread, buf.length));
// if (n <= 0) {
// break;
// }
// baos.write(buf, 0, n);
// toread -= n;
// }
// if (baos.size() != len)
// throw new IOException("Lenght of read object " + baos.size() + " does not match expected size " + len);
// obj = new BasicDBObject((BasicBSONObject) BSON.decode(baos.toByteArray()));
}
return obj;
}
public void close() throws IOException {
if (br != null) {
br.close();
}
if (is != null) {
is.close();
}
}
private void fillInTemplate(DBObject obj, List<String> values) {
for (String field : obj.keySet()) {
Object val = obj.get(field);
if (val instanceof BasicDBObject) {
fillInTemplate((BasicDBObject) val, values);
} else if (val instanceof BasicDBList) {
fillInTemplate((BasicDBList) val, values);
} else if (val instanceof String) {
String str = (String) val;
if (str.startsWith("$")) {
str = str.substring(1);
int slash = str.indexOf("/");
String ref = str;
String type = null;
if (slash > 0) {
ref = str.substring(0, slash);
type = str.substring(slash + 1);
}
// find field index
int index = 0;
while (index < filter.length && !filter[index].equals(ref)) {
++index;
}
if (index >= filter.length) {
continue;
}
String value = values.get(index);
try {
if (type == null || "JSON".equals(type)) {
// this is typically used for quoted Strings
obj.put(field, JSON.parse(value));
} else if ("String".equals(type)) {
obj.put(field, value);
} else if ("Date".equals(type)) {
Long time = Long.valueOf(value);
obj.put(field, new Date(time));
} else if ("Boolean".equals(type)) {
obj.put(field, Boolean.valueOf(value));
} else if ("Integer".equals(type)) {
obj.put(field, Integer.valueOf(value));
} else if ("Long".equals(type)) {
obj.put(field, Long.valueOf(value));
} else if ("Double".equals(type)) {
obj.put(field, Double.valueOf(value));
}
} catch (Exception ex) {
Logger.getLogger(DocumentDeserializer.class.getName()).log(Level.WARNING, null, ex);
}
} else {
// this is a static value
obj.put(field, val);
}
} else {
// this is a static value
obj.put(field, val);
}
}
}
void setDelimiter(String delimiter) {
if (!delimiter.trim().isEmpty()) {
this.delimiter = delimiter.substring(0, 1);
}
}
void setQuote(String quote) {
if (!quote.trim().isEmpty()) {
this.quote = quote.substring(0, 1);
}
}
}