Package de.fuberlin.wiwiss.d2rq.csv

Source Code of de.fuberlin.wiwiss.d2rq.csv.TranslationTableParser

package de.fuberlin.wiwiss.d2rq.csv;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.hp.hpl.jena.n3.IRIResolver;

import de.fuberlin.wiwiss.d2rq.D2RQException;
import de.fuberlin.wiwiss.d2rq.map.TranslationTable.Translation;

/**
* Parses the contents of a CSV file into a collection of
* <tt>Translation</tt>s. The CVS file must contain exactly
* two columns. DB values come from the first, RDF values
* from the second.
*
* @author Richard Cyganiak (richard@cyganiak.de)
*/
public class TranslationTableParser {
  private Log log = LogFactory.getLog(TranslationTableParser.class);
  private BufferedReader reader;
  private CSV csvLineParser = new CSV();
  private String url;

  public TranslationTableParser(Reader reader) {
    this.reader = new BufferedReader(reader);
  }
 
  public TranslationTableParser(String url) {
    try {
      this.url = new IRIResolver().resolve(url);;
      this.reader = new BufferedReader(new FileReader(new File(new URI(this.url))));
    } catch (FileNotFoundException fnfex) {
      throw new D2RQException("File not found at URL: " + this.url);
    } catch (URISyntaxException usynex) {
      throw new D2RQException("Malformed URI: " + this.url);
    }
  }
 
  public Collection<Translation> parseTranslations() {
    try {
      List<Translation> result = new ArrayList<Translation>();
      while (true) {
        String line = this.reader.readLine();
        if (line == null) {
          break;
        }
        String[] fields = this.csvLineParser.parse(line);
        if (fields.length != 2) {
          this.log.warn("Skipping line with " +
              fields.length + " instead of 2 columns in CSV file " + this.url);
          continue;
        }
        result.add(new Translation(fields[0], fields[1]));
      }
      return result;
    } catch (IOException iex) {
      throw new D2RQException(iex);
    }
  }
}
TOP

Related Classes of de.fuberlin.wiwiss.d2rq.csv.TranslationTableParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.