Package org.xerial.silk.weaver.cui

Source Code of org.xerial.silk.weaver.cui.Find

/*--------------------------------------------------------------------------
*  Copyright 2011 Taro L. Saito
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*--------------------------------------------------------------------------*/
//--------------------------------------
// XerialJ
//
// Find.java
// Since: 2011/02/09 15:30:09
//
// $URL$
// $Author$
//--------------------------------------
package org.xerial.silk.weaver.cui;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;

import org.xerial.core.XerialErrorCode;
import org.xerial.core.XerialException;
import org.xerial.json.JSONTreeParser;
import org.xerial.lens.relation.Node;
import org.xerial.lens.relation.Tuple;
import org.xerial.lens.relation.TupleVisitor;
import org.xerial.lens.relation.lang.RelationExpr;
import org.xerial.lens.relation.query.QuerySet;
import org.xerial.lens.relation.query.RelationExtractor;
import org.xerial.lens.relation.query.RelationHandler;
import org.xerial.lens.relation.schema.Schema;
import org.xerial.silk.SilkEnv;
import org.xerial.silk.SilkParser;
import org.xerial.util.FileType;
import org.xerial.util.StringUtil;
import org.xerial.util.io.StandardInputStream;
import org.xerial.util.log.Logger;
import org.xerial.util.opt.Argument;
import org.xerial.util.opt.Command;
import org.xerial.util.opt.Option;
import org.xerial.util.text.TabAsTreeParser;
import org.xerial.util.tree.TreeParser;
import org.xerial.xml.XMLTreeParser;

public class Find implements Command
{
    private static Logger _logger = Logger.getLogger(Find.class);

    @Override
    public String name() {
        return "find";
    }

    @Override
    public String getOneLineDescription() {
        return "find relation from Silk data";
    }

    @Override
    public Object getOptionHolder() {
        return this;
    }

    @Argument(index = 0)
    private String   query;

    @Argument(index = 1)
    private String   inputResource = "-";

    @Option(symbol = "t", description = "input file type. [silk, json, xml, tab, csv]")
    private FileType inputFileType = FileType.SILK;

    @Option(symbol = "d", description = "column delimiter")
    public String    splitChar     = "\t";

    @Override
    public void execute(String[] args) throws Exception {
        if (query == null)
            throw new XerialException(XerialErrorCode.MISSING_ARGUMENT, "no query is given");

        // input
        Reader in = null;
        if (inputResource.equals("-")) {
            in = new InputStreamReader(new StandardInputStream());
            _logger.debug("read data from STDIN");
        }
        else {
            inputFileType = FileType.getFileType(inputResource);

            if (startsWithProtocol(inputResource)) {
                URL inputURL = new URL(inputResource);
                _logger.debug("read from resource: " + inputResource);
                in = new BufferedReader(new InputStreamReader(inputURL.openStream()));
            }
            else {
                _logger.debug("read from file: " + inputResource);
                in = new BufferedReader(new FileReader(inputResource));
            }
        }

        RelationExpr expr = RelationExpr.parse(query);
        QuerySet qs = expr.buildQuerySet();
        _logger.debug("query set: " + qs);

        TreeParser treeInput = null;

        switch (inputFileType) {
        case SILK:
            treeInput = new SilkParser(in, SilkEnv.newEnv(inputResource));
            break;
        case XML:
            treeInput = new XMLTreeParser(in);
            break;
        case JSON:
            treeInput = new JSONTreeParser(in);
            break;
        case TAB:
            treeInput = new TabAsTreeParser(in);
            break;
        case CSV:
            treeInput = new TabAsTreeParser(in, ',');
            break;
        // TODO fasta file support
        //        case FASTA:
        //            break;
        default:
            _logger.warn("unknown file type: " + inputFileType);
            _logger.warn("using silk type, instead");
            inputFileType = FileType.SILK;
            break;
        }

        RelationExtractor.run(qs, treeInput, new RelationHandler() {

            public void relation(Schema s, Tuple<Node> relation) {
                TupleFormatter f = new TupleFormatter();
                relation.accept(f);

                System.out.println(StringUtil.join(f.elem, splitChar));
            }
        });

    }

    private static class TupleFormatter implements TupleVisitor<Node>
    {

        ArrayList<String> elem = new ArrayList<String>();

        public void visitNode(Node node) {
            elem.add(node.nodeValue == null ? "" : node.nodeValue);
        }

        public void visitTuple(Tuple<Node> tuple) {
            for (int i = 1; i < tuple.size(); ++i) {
                tuple.get(i).accept(this);
            }
        }
    }

    public static boolean startsWithProtocol(String resourceName) {
        if (resourceName == null)
            return false;

        return resourceName.matches("^[A-za-z0-9+]+://.*");
    }

}
TOP

Related Classes of org.xerial.silk.weaver.cui.Find

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.