/*--------------------------------------------------------------------------
* Copyright 2011 Taro L. Saito
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*--------------------------------------------------------------------------*/
//--------------------------------------
// XerialJ
//
// Find.java
// Since: 2011/02/09 15:30:09
//
// $URL$
// $Author$
//--------------------------------------
package org.xerial.silk.weaver.cui;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import org.xerial.core.XerialErrorCode;
import org.xerial.core.XerialException;
import org.xerial.json.JSONTreeParser;
import org.xerial.lens.relation.Node;
import org.xerial.lens.relation.Tuple;
import org.xerial.lens.relation.TupleVisitor;
import org.xerial.lens.relation.lang.RelationExpr;
import org.xerial.lens.relation.query.QuerySet;
import org.xerial.lens.relation.query.RelationExtractor;
import org.xerial.lens.relation.query.RelationHandler;
import org.xerial.lens.relation.schema.Schema;
import org.xerial.silk.SilkEnv;
import org.xerial.silk.SilkParser;
import org.xerial.util.FileType;
import org.xerial.util.StringUtil;
import org.xerial.util.io.StandardInputStream;
import org.xerial.util.log.Logger;
import org.xerial.util.opt.Argument;
import org.xerial.util.opt.Command;
import org.xerial.util.opt.Option;
import org.xerial.util.text.TabAsTreeParser;
import org.xerial.util.tree.TreeParser;
import org.xerial.xml.XMLTreeParser;
public class Find implements Command
{
private static Logger _logger = Logger.getLogger(Find.class);
@Override
public String name() {
return "find";
}
@Override
public String getOneLineDescription() {
return "find relation from Silk data";
}
@Override
public Object getOptionHolder() {
return this;
}
@Argument(index = 0)
private String query;
@Argument(index = 1)
private String inputResource = "-";
@Option(symbol = "t", description = "input file type. [silk, json, xml, tab, csv]")
private FileType inputFileType = FileType.SILK;
@Option(symbol = "d", description = "column delimiter")
public String splitChar = "\t";
@Override
public void execute(String[] args) throws Exception {
if (query == null)
throw new XerialException(XerialErrorCode.MISSING_ARGUMENT, "no query is given");
// input
Reader in = null;
if (inputResource.equals("-")) {
in = new InputStreamReader(new StandardInputStream());
_logger.debug("read data from STDIN");
}
else {
inputFileType = FileType.getFileType(inputResource);
if (startsWithProtocol(inputResource)) {
URL inputURL = new URL(inputResource);
_logger.debug("read from resource: " + inputResource);
in = new BufferedReader(new InputStreamReader(inputURL.openStream()));
}
else {
_logger.debug("read from file: " + inputResource);
in = new BufferedReader(new FileReader(inputResource));
}
}
RelationExpr expr = RelationExpr.parse(query);
QuerySet qs = expr.buildQuerySet();
_logger.debug("query set: " + qs);
TreeParser treeInput = null;
switch (inputFileType) {
case SILK:
treeInput = new SilkParser(in, SilkEnv.newEnv(inputResource));
break;
case XML:
treeInput = new XMLTreeParser(in);
break;
case JSON:
treeInput = new JSONTreeParser(in);
break;
case TAB:
treeInput = new TabAsTreeParser(in);
break;
case CSV:
treeInput = new TabAsTreeParser(in, ',');
break;
// TODO fasta file support
// case FASTA:
// break;
default:
_logger.warn("unknown file type: " + inputFileType);
_logger.warn("using silk type, instead");
inputFileType = FileType.SILK;
break;
}
RelationExtractor.run(qs, treeInput, new RelationHandler() {
public void relation(Schema s, Tuple<Node> relation) {
TupleFormatter f = new TupleFormatter();
relation.accept(f);
System.out.println(StringUtil.join(f.elem, splitChar));
}
});
}
private static class TupleFormatter implements TupleVisitor<Node>
{
ArrayList<String> elem = new ArrayList<String>();
public void visitNode(Node node) {
elem.add(node.nodeValue == null ? "" : node.nodeValue);
}
public void visitTuple(Tuple<Node> tuple) {
for (int i = 1; i < tuple.size(); ++i) {
tuple.get(i).accept(this);
}
}
}
public static boolean startsWithProtocol(String resourceName) {
if (resourceName == null)
return false;
return resourceName.matches("^[A-za-z0-9+]+://.*");
}
}