Package net.sourceforge.align.parser

Examples of net.sourceforge.align.parser.Parser


      throw new UnknownParameterException("class");
    }

    filter = FilterDecorators.decorate(filter);

    Parser parser = new AlParser(getIn());
    Formatter formatter = new AlFormatter(getOut());
    List<Alignment> alignmentList = parser.parse();
    alignmentList = filter.apply(alignmentList);
    formatter.format(alignmentList);
  }
View Full Code Here


        if (commandLine.getArgs().length < 1) {
          throw new WrongArgumentCountException("1, 2, 3, ...", commandLine.getArgs().length);
        }
        for (String fileName : commandLine.getArgs()) {
          Reader reader = getReader(getFileInputStream(fileName));
          Parser parser = new AlParser(reader);
          List<Alignment> currentAlignmentList = parser.parse();
          alignmentList.addAll(currentAlignmentList);
          reader.close();
        }
      } else if (cls.equals("txt")) {
        if ((commandLine.getArgs().length % 2) != 0) {
          throw new WrongArgumentCountException("2, 4, 6, ...", commandLine.getArgs().length);
        }
        for (int i = 0; i < commandLine.getArgs().length; i += 2) {
          String sourceFileName = commandLine.getArgs()[i];
          String targetFileName = commandLine.getArgs()[i + 1];
          Reader sourceReader = getReader(getFileInputStream(sourceFileName));
          Reader targetReader = getReader(getFileInputStream(targetFileName));
          Parser parser = new PlaintextParser(sourceReader, targetReader);
          List<Alignment> currentAlignmentList = parser.parse();
          alignmentList.addAll(currentAlignmentList);
          sourceReader.close();
          targetReader.close();
        }
      } else if (cls.equals("tmx")) {
        if (commandLine.getArgs().length < 1) {
          throw new WrongArgumentCountException("1, 2, 3, ...", commandLine.getArgs().length);
        }
        String languages = commandLine.getOptionValue('l');
        String[] languageArray;
        if (languages == null) {
          languageArray = new String[0];
        } else {
          languageArray = languages.split(",");
          if (languageArray.length != 2) {
            throw new ParameterFormatException("languages");
          }
        }
        for (String fileName : commandLine.getArgs()) {
          Reader reader = getReader(getFileInputStream(fileName));
          Parser parser;
          if (languageArray.length == 0) {
            parser = new TmxParser(reader);
          } else {
            parser = new TmxParser(reader, languageArray[0],
                languageArray[1]);
          }
          List<Alignment> currentAlignmentList = parser.parse();
          alignmentList.addAll(currentAlignmentList);
          reader.close();
        }
       
      } else {
View Full Code Here

      Writer writer = getSingleWriter(commandLine);
      formatter = new InfoFormatter(writer);
    } else {
      throw new UnknownParameterException("class");
    }
    Parser parser = new AlParser(getIn());
    List<Alignment> alignmentList = parser.parse();
    formatter.format(alignmentList);
  }
View Full Code Here

    if (cls == null) {
      throw new MissingParameterException("class");
    }
    ModifyAlgorithm sourceAlgorithm;
    ModifyAlgorithm targetAlgorithm = null;
    Parser parser = new AlParser(getIn());
    List<Alignment> alignmentList = null;
    if (cls.equals("split-word")) {
      sourceAlgorithm = new WordSplitAlgorithm();
    } else if (cls.equals("split-sentence")) {
      sourceAlgorithm = new SentenceSplitAlgorithm();
    } else if (cls.equals("split-paragraph")) {
      sourceAlgorithm = new ParagraphSplitAlgorithm();
    } else if (cls.equals("split-srx")) {
      String fileName = commandLine.getOptionValue('f');
      if (fileName == null) {
        throw new MissingParameterException("file");
      }
      String languages = commandLine.getOptionValue('l');
      if (languages == null) {
        throw new MissingParameterException("languages");
      }
      String[] languageArray = languages.split(",");
      if (languageArray.length != 2) {
        throw new ParameterFormatException("languages");
      }
      Reader reader = getReader(getFileInputStream(fileName));
      sourceAlgorithm = new SrxSplitAlgorithm(reader, languageArray[0]);
      reader = getReader(getFileInputStream(fileName));
      targetAlgorithm = new SrxSplitAlgorithm(reader, languageArray[1]);
    } else if (cls.equals("merge")) {
      String separator = commandLine.getOptionValue('s');
      if (separator == null) {
        sourceAlgorithm = new SeparatorMergeAlgorithm();
      } else {
        separator = separator.replaceAll("\\\\t", "\t");
        separator = separator.replaceAll("\\\\n", "\n");
        sourceAlgorithm = new SeparatorMergeAlgorithm(separator);
      }
    } else if (cls.equals("trim")) {
      sourceAlgorithm = new TrimCleanAlgorithm();
    } else if (cls.equals("lowercase")) {
      sourceAlgorithm = new LowercaseCleanAlgorithm();
    } else if (cls.equals("filter-non-words")) {
      sourceAlgorithm = new FilterNonWordsCleanAlgorithm();
    } else if (cls.equals("unify-rare-words")) {
      alignmentList = parser.parse();
      Pair<ModifyAlgorithm, ModifyAlgorithm> algorithmPair =
        createUnifyRareWordsAlgorithms(commandLine, alignmentList);
      sourceAlgorithm = algorithmPair.first;
      targetAlgorithm = algorithmPair.second;
    } else {
      throw new UnknownParameterException("class");
    }
    String part = commandLine.getOptionValue('p');
    if (part == null) {
      part = "both";
    }
    if (part.equals("both")) {
      if (targetAlgorithm == null) {
        targetAlgorithm = sourceAlgorithm;
      }
    } else if (part.equals("source")) {
      targetAlgorithm = new NullModifyAlgorithm();
    } else if (part.equals("target")) {
      sourceAlgorithm = new NullModifyAlgorithm();
    } else {
      throw new UnknownParameterException("part");
    }
   
    Formatter formatter = new AlFormatter(getOut());
   
    Filter filter = new Modifier(sourceAlgorithm, targetAlgorithm);
    filter = FilterDecorators.decorate(filter);

    if (alignmentList == null) {
      alignmentList = parser.parse();
    }
   
    alignmentList = filter.apply(alignmentList);
    formatter.format(alignmentList);
  }
View Full Code Here

      SplitAlgorithm splitAlgorithm =
        VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM;
      Vocabulary sourceVocabulary = new Vocabulary();
      Vocabulary targetVocabulary = new Vocabulary();
     
      Parser parser = new AlParser(getIn());
      List<Alignment> alignmentList = parser.parse();

      List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
      List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();
      for (Alignment alignment : alignmentList) {
        sourceWidList.add(tokenizePutGet(
View Full Code Here

      throw new UnknownParameterException("class");
    }
   
    filter = FilterDecorators.decorate(filter);
   
    Parser parser = new AlParser(getIn());
    Formatter formatter = new AlFormatter(getOut());
    List<Alignment> alignmentList = parser.parse();
    alignmentList = filter.apply(alignmentList);
    formatter.format(alignmentList);
  }
View Full Code Here

    formatter.format(alignmentList);
  }

  private List<Alignment> loadAlignmentList(String fileName) {
    Reader reader = getReader(getFileInputStream(fileName));
    Parser parser = new AlParser(reader);
    return parser.parse();
  }
View Full Code Here

  protected void run(CommandLine commandLine) {
    if (commandLine.getOptions().length == 0) {
      throw new MissingParameterException("class");
    }
    Filter filter;
    Parser parser = new AlParser(getIn());
    List<Alignment> alignmentList = null;
    String cls = commandLine.getOptionValue('c');
    if (cls == null) {
      throw new MissingParameterException("class");
    }
    if (cls.equals("unify")) {
      String unificationCorpus = commandLine.getOptionValue('u');
      if (unificationCorpus == null) {
        throw new MissingParameterException("unification-corpus");
      }
      List<Alignment> unificationAlignmentList =
        loadAlignmentList(unificationCorpus);
      filter = new UnifyAligner(unificationAlignmentList);
    } else {
      alignmentList = parser.parse();
      AlignAlgorithm algorithm = createAlgorithm(commandLine, alignmentList);
      filter = new Aligner(algorithm);
    }
    filter = FilterDecorators.decorate(filter);
    Formatter formatter = new AlFormatter(getOut());
    if (alignmentList == null) {
      alignmentList = parser.parse();
    }
    alignmentList = filter.apply(alignmentList);
    formatter.format(alignmentList);
  }
View Full Code Here

        targetVocabulary);
  }
 
  private List<Alignment> loadAlignmentList(String fileName) {
    Reader reader = getReader(getFileInputStream(fileName));
    Parser parser = new AlParser(reader);
    return parser.parse();
  }
View Full Code Here

   
    String leftFileName = commandLine.getArgs()[0];
    String rightFileName = commandLine.getArgs()[1];
    Reader leftReader = getReader(getFileInputStream(leftFileName));
    Reader rightReader = getReader(getFileInputStream(rightFileName));
    Parser leftParser = new AlParser(leftReader);
    Parser rightParser = new AlParser(rightReader);
   
    int width = createInt(commandLine, "width", PresentationFormatter.DEFAULT_WIDTH);

    Formatter formatter = new PresentationFormatter(getErr(), width);

    boolean showDiff = commandLine.hasOption('d');

    List<Alignment> leftAlignmentList = leftParser.parse();
    List<Alignment> rightAlignmentList = rightParser.parse();

    Diff diff = compare(leftAlignmentList, rightAlignmentList);
   
    if (showDiff) {
     
View Full Code Here

TOP

Related Classes of net.sourceforge.align.parser.Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.